Automated rollback of changelist 232717775.

PiperOrigin-RevId: 232807986
author: Uday Bondhugula <bondhugula@google.com> 2019-02-06 21:54:18 -0800
committer: jpienaar <jpienaar@google.com> 2019-03-29 16:19:33 -0700
commit: 4ba8c9147d04d82d629dde4730e1dd5d4ae4123d (patch)
tree: b16681ef2a8f2327993d1f4b75596ce100732403 /mlir
parent: 99fee0b181106a0213501800c6076aec95afa46c (diff)
download: bcm5719-llvm-4ba8c9147d04d82d629dde4730e1dd5d4ae4123d.tar.gz
bcm5719-llvm-4ba8c9147d04d82d629dde4730e1dd5d4ae4123d.zip
62 files changed, 1036 insertions, 1041 deletions
diff --git a/mlir/g3doc/Dialects/Affine.md b/mlir/g3doc/Dialects/Affine.md
index 0c69c60cbe9..55d26f0d956 100644
--- a/mlir/g3doc/Dialects/Affine.md
+++ b/mlir/g3doc/Dialects/Affine.md
@@ -15,7 +15,7 @@ loops and if instructions), the result of a
 [`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
 arguments any symbolic identifiers. Dimensions may be bound not only to anything
 that a symbol is bound to, but also to induction variables of enclosing
-[`affine.for` operations](#'affine.for'-operation), and the result of an
+[`for` operations](#'for'-operation), and the result of an
 [`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
 other dimensions and symbols).
 
@@ -47,12 +47,12 @@ Example:
 %2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
 ```
 
-#### 'affine.for' operation {#'affine.for'-operation}
+#### 'for' operation {#'for'-operation}
 
 Syntax:
 
 ``` {.ebnf}
-operation   ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
+operation   ::= `for` ssa-id `=` lower-bound `to` upper-bound
                       (`step` integer-literal)? `{` inst* `}`
 
 lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
@@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
 shorthand-bound ::= ssa-id | `-`? integer-literal
 ```
 
-The `affine.for` operation represents an affine loop nest, defining an SSA value
-for its induction variable. This SSA value always has type
+The `for` operation represents an affine loop nest, defining an SSA value for
+its induction variable. This SSA value always has type
 [`index`](LangRef.md#index-type), which is the size of the machine word.
 
-The `affine.for` operation executes its body a number of times iterating from a
-lower bound to an upper bound by a stride. The stride, represented by `step`, is
-a positive constant integer which defaults to "1" if not present. The lower and
+The `for` operation executes its body a number of times iterating from a lower
+bound to an upper bound by a stride. The stride, represented by `step`, is a
+positive constant integer which defaults to "1" if not present. The lower and
 upper bounds specify a half-open range: the range includes the lower bound but
 does not include the upper bound.
 
-The lower and upper bounds of a `affine.for` operation are represented as an
+The lower and upper bounds of a `for` operation are represented as an
 application of an affine mapping to a list of SSA values passed to the map. The
 [same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
 values as for all bindings of SSA values to dimensions and symbols.
@@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:
 
 func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
   %N = dim %A, 0 : memref<?x?xf32>
-  affine.for %i = 0 to %N step 1 {
-    affine.for %j = 0 to %N {   // implicitly steps by 1
+  for %i = 0 to %N step 1 {
+    for %j = 0 to %N {   // implicitly steps by 1
       %0 = affine.apply #map57(%j)[%N]
       %tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
       call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
@@ -130,8 +130,8 @@ Example:
 #set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
                       d1 - 10 >= 0, s0 - d1 - 9 >= 0)
 func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
-  affine.for %i = 0 to %N {
-     affine.for %j = 0 to %N {
+  for %i = 0 to %N {
+     for %j = 0 to %N {
        %0 = affine.apply #map42(%j)
        %tmp = call @S1(%X, %i, %0)
        affine.if #set(%i, %j)[%N] {
diff --git a/mlir/g3doc/Dialects/SuperVector.md b/mlir/g3doc/Dialects/SuperVector.md
index cd540335a52..09beb950e37 100644
--- a/mlir/g3doc/Dialects/SuperVector.md
+++ b/mlir/g3doc/Dialects/SuperVector.md
@@ -22,9 +22,9 @@ Examples:
 // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> and
 // pad with %f0 to handle the boundary case:
 %f0 = constant 0.0f : f32
-affine.for %i0 = 0 to %0 {
-  affine.for %i1 = 0 to %1 step 256 {
-    affine.for %i2 = 0 to %2 step 32 {
+for %i0 = 0 to %0 {
+  for %i1 = 0 to %1 step 256 {
+    for %i2 = 0 to %2 step 32 {
       %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
            {permutation_map: (d0, d1, d2) -> (d2, d1)} :
            (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@@ -33,8 +33,8 @@ affine.for %i0 = 0 to %0 {
 // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
 // vector<128xf32>. The underlying implementation will require a 1-D vector
 // broadcast:
-affine.for %i0 = 0 to %0 {
-  affine.for %i1 = 0 to %1 {
+for %i0 = 0 to %0 {
+  for %i1 = 0 to %1 {
     %3 = vector_transfer_read %A, %i0, %i1
          {permutation_map: (d0, d1) -> (0)} :
          (memref<?x?xf32>, index, index) -> vector<128xf32>
@@ -80,9 +80,9 @@ A notional lowering of vector_transfer_read could generate code resembling:
 // %expr1, %expr2, %expr3, %expr4 defined before this point
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
-affine.for %i = 0 to 3 {
-  affine.for %j = 0 to 4 {
-    affine.for %k = 0 to 5 {
+for %i = 0 to 3 {
+  for %j = 0 to 4 {
+    for %k = 0 to 5 {
       %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
       store %tmp[%i, %j, %k] : vector<3x4x5xf32>
 }}}
@@ -101,8 +101,8 @@ lowered code would resemble:
 // %expr1, %expr2, %expr3, %expr4 defined before this point
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
-affine.for %i = 0 to 3 {
-  affine.for %k = 0 to 5 {
+for %i = 0 to 3 {
+  for %k = 0 to 5 {
     %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
     store %tmp[%i, 0, %k] : vector<3x4x5xf32>
 }}
@@ -129,10 +129,10 @@ Examples:
 
 ```mlir {.mlir}
 // write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
-affine.for %i0 = 0 to %0 {
-  affine.for %i1 = 0 to %1 step 32 {
-    affine.for %i2 = 0 to %2 step 64 {
-      affine.for %i3 = 0 to %3 step 16 {
+for %i0 = 0 to %0 {
+  for %i1 = 0 to %1 step 32 {
+    for %i2 = 0 to %2 step 64 {
+      for %i3 = 0 to %3 step 16 {
         %val = `ssa-value` : vector<16x32x64xf32>
         vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
           {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
diff --git a/mlir/g3doc/LangRef.md b/mlir/g3doc/LangRef.md
index fdfc43ea39d..3448927d214 100644
--- a/mlir/g3doc/LangRef.md
+++ b/mlir/g3doc/LangRef.md
@@ -40,7 +40,7 @@ which means that values are defined before use and have scope defined by their
 dominance relations. Operations may produce zero or more results, and each is a
 distinct SSA value with its own type defined by the [type system](#type-system).
 
-MLIR incorporates polyhedral compiler concepts, including `affine.for` and
+MLIR incorporates polyhedral compiler concepts, including `for` and
 `affine.if` operations defined by the [affine dialect](Dialects/Affine.md),
 which model affine loops and affine conditionals. It also includes affine maps
 integrated into the type system - they are key to the representation of data and
@@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
   %C = alloc memref<100x50xf32>()
 
   // Multiplication loop nest.
-  affine.for %i = 0 to 100 {
-     affine.for %j = 0 to 50 {
+  for  %i = 0 to 100 {
+     for %j = 0 to 50 {
         store 0 to %C[%i, %j] : memref<100x50xf32>
-        affine.for %k = 0 to %n {
+        for %k = 0 to %n {
            %a_v  = load %A[%i, %k] : memref<100x?xf32>
            %b_v  = load %B[%k, %j] : memref<?x50xf32>
            %prod = mulf %a_v, %b_v : f32
@@ -1434,7 +1434,7 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
 is of rank 3, then 3 indices are required for the load following the memref
 identifier).
 
-In an `affine.if` or `affine.for` body, the indices of a load are restricted to
+In an `affine.if` or `for` body, the indices of a load are restricted to
 SSA values bound to surrounding loop induction variables,
 [symbols](#dimensions-and-symbols), results of a
 [`constant` operation](#'constant'-operation), or the result of an
@@ -1456,7 +1456,7 @@ Example:
 
 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in affine `affine.if`
-and `affine.for` instructions) the compiler can follow use-def chains (e.g.
+and `for` instructions) the compiler can follow use-def chains (e.g.
 through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
 operations) to precisely analyze references at compile-time using polyhedral
 techniques. This is possible because of the
@@ -1492,7 +1492,7 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>
 
 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in polyhedral
-`affine.if` and `affine.for` instructions) the compiler can follow use-def
+`affine.if` and `for` instructions) the compiler can follow use-def
 chains (e.g. through
 [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
 precisely analyze references at compile-time using polyhedral techniques. This
diff --git a/mlir/g3doc/Passes.md b/mlir/g3doc/Passes.md
index bb15cec22a4..dc46b97f7b1 100644
--- a/mlir/g3doc/Passes.md
+++ b/mlir/g3doc/Passes.md
@@ -39,7 +39,7 @@ These restrictions may be lifted in the future.
 
 ### Output IR
 
-Functions with `affine.for` and `affine.if` instructions eliminated. These
+Functions with `for` and `affine.if` instructions eliminated. These
 functions may contain operations from the Standard dialect in addition to those
 already present before the pass.
 
diff --git a/mlir/g3doc/Rationale.md b/mlir/g3doc/Rationale.md
index 8b22e93598c..949f405d5f6 100644
--- a/mlir/g3doc/Rationale.md
+++ b/mlir/g3doc/Rationale.md
@@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
   // dynamically using dim instruction.
   %N = dim %A, 1 : memref<8x?xf32, #lmap>
 
-  affine.for %i = 0 to 8 {
-    affine.for %j = 0 to %N {
+  for %i = 0 to 8 {
+    for %j = 0 to %N {
       // A[i,j] += 1
       %s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
       %s2 = add %s1, 1
@@ -534,7 +534,7 @@ nested in an outer function that using affine loops.
 func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
   %ni = dim %A, 0 : memref<?x?xi32>
   // This loop can be parallelized
-  affine.for %i = 0 to %ni {
+  for %i = 0 to %ni {
     call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
   }
   return
@@ -568,7 +568,7 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {
 
 As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
 identifiers to be used with the affine.apply instruction only apply to accesses
-inside `affine.for` and `affine.if` instructions. However, an analysis of
+inside `for` and `affine.if` instructions. However, an analysis of
 accesses inside the called function (`@search_body`) is necessary to determine
 if the `%i` loop could be parallelized: such function access analysis is calling
 context sensitive.
@@ -590,8 +590,8 @@ for (i=0; i <N; i++)
 
 ```mlir {.mlir}
 func @outer_nest(%n) : (i32) {
-  affine.for %i = 0 to %n {
-    affine.for %j = 0 to %n {
+  for %i = 0 to %n {
+    for %j = 0 to %n {
       call @inner_nest(%i, %j, %n)
     }
   }
@@ -606,8 +606,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
 }
 
 func @inner_nest2(%m, %n) -> i32 {
-  affine.for %k = 0 to %m {
-    affine.for %l = 0 to %n {
+  for %k = 0 to %m {
+    for %l = 0 to %n {
       ...
     }
   }
@@ -649,13 +649,13 @@ in a dilated convolution.
 func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
              memref<5x5x3x32xf32, #lm0, vmem> %kernel,
              memref<16x512x512x32xf32, #lm0, vmem> %output) {
-  affine.for %b = 0 to %batch {
-    affine.for %oh = 0 to %output_height {
-      affine.for %ow = 0 to %output_width {
-        affine.for %of = 0 to %output_feature {
-          affine.for %kh = 0 to %kernel_height {
-            affine.for %kw = 0 to %kernel_width {
-              affine.for %if = 0 to %input_feature {
+  for %b = 0 to %batch {
+    for %oh = 0 to %output_height {
+      for %ow = 0 to %output_width {
+        for %of = 0 to %output_feature {
+          for %kh = 0 to %kernel_height {
+            for %kw = 0 to %kernel_width {
+              for %if = 0 to %input_feature {
                 // Calculate input indices.
                 %1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
                   [%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
@@ -899,10 +899,10 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
     representation. 2(b) requires no change, but impacts how cost models look at
     index and layout maps.
 
-### `affine.if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
+### `affine.if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
 
 We considered providing a representation for SSA values that are live out of
-`affine.if/else` conditional bodies and loop carried in `affine.for` loops. We
+`affine.if/else` conditional bodies and loop carried in `for` loops. We
 ultimately abandoned this approach due to its complexity. In the current design
 of MLIR, scalar variables cannot escape for loops or if instructions. In
 situations, where escaping is necessary, we use zero-dimensional tensors and
@@ -919,7 +919,7 @@ Syntax:
 
 ``` {.ebnf}
 [<out-var-list> =]
-affine.for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
+for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
    [with <in-var-list>] { <loop-instruction-list> }
 ```
 
@@ -934,7 +934,7 @@ Example:
 // Return sum of elements in 1-dimensional mref A
 func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
    %init = 0
-   %result = affine.for %i = 0 to N with %tmp(%init) {
+   %result = for %i = 0 to N with %tmp(%init) {
       %value = load %A[%i]
       %sum = %value + %tmp
       yield %sum
@@ -964,7 +964,7 @@ Example:
 // Compute sum of half of the array
 func int32 @sum_half(%A, %N) {
    %s0 = 0
-   %s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
+   %s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
        %s3 = affine.if (%i >= %N / 2) {
           %v0 = load %A[%i]
           %s4 = %s2 + %v0
diff --git a/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md b/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
index a1830f0b4ab..6fe05a4d8c4 100644
--- a/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
+++ b/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
@@ -184,8 +184,8 @@ Our simple example above would be represented as:
 
 ```mlir
   mlfunc @simple_example(... %N) {
-    affine.for %i = 0 ... %N step 1 {
-      affine.for %j = 0 ... %N step 1 {
+    for %i = 0 ... %N step 1 {
+      for %j = 0 ... %N step 1 {
         // identity noop in this case, but can exist in general.
         %0,%1 = affine.apply #57(%i, %j)
 
@@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:
 
 ```mlir
   mlfunc @reduced_domain_example(... %N) {
-    affine.for %i = 0 ... %N step 1 {
-      affine.for %j = 0 ... %N step 1 {
+    for %i = 0 ... %N step 1 {
+      for %j = 0 ... %N step 1 {
         // identity noop in this case, but can exist in general.
         %0,%1 = affinecall #57(%i, %j)
 
@@ -233,8 +233,8 @@ that transformations call into):
 
 ```mlir
 mlfunc @skewed_domain_example(... %N) {
-  affine.for %t1 = 0 ... 2*N-2 step 1 {
-    affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
+  for %t1 = 0 ... 2*N-2 step 1 {
+    for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
       (%i, %j) = (%t1-%t2, %t2)
       ...
     }
@@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
 ### Lack of redundancy in IR
 
 The traditional form has multiple encodings for the same sorts of behavior: you
-end up having bits on `affine.for` loops to specify whether codegen should use
+end up having bits on `for` loops to specify whether codegen should use
 "atomic/separate" policies, unroll loops, etc. Instructions can be split or can
 generate multiple copies of their instruction because of overlapping domains,
 etc.
diff --git a/mlir/include/mlir/AffineOps/AffineOps.h b/mlir/include/mlir/AffineOps/AffineOps.h
index b7a5c0c8326..caa7e16cda8 100644
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@@ -90,15 +90,15 @@ private:
   explicit AffineApplyOp(const Instruction *state) : Op(state) {}
 };
 
-/// The "affine.for" instruction represents an affine loop nest, defining an SSA
-/// value for its induction variable. The induction variable is represented as a
+/// The "for" instruction represents an affine loop nest, defining an SSA value
+/// for its induction variable. The induction variable is represented as a
 /// BlockArgument to the entry block of the body. The body and induction
-/// variable can be created automatically for new "affine.for" ops with
-/// 'createBody'. This SSA value always has type index, which is the size of the
-/// machine word. The stride, represented by step, is a positive constant
-/// integer which defaults to "1" if not present. The lower and upper bounds
-/// specify a half-open range: the range includes the lower bound but does not
-/// include the upper bound.
+/// variable can be created automatically for new "for" ops with 'createBody'.
+/// This SSA value always has type index, which is the size of the machine word.
+/// The stride, represented by step, is a positive constant integer which
+/// defaults to "1" if not present. The lower and upper bounds specify a
+/// half-open range: the range includes the lower bound but does not include the
+/// upper bound.
 ///
 /// The lower and upper bounds of a for operation are represented as an
 /// application of an affine mapping to a list of SSA values passed to the map.
@@ -110,7 +110,7 @@ private:
 ///
 /// Example:
 ///
-///   affine.for %i = 1 to 10 {
+///   for %i = 1 to 10 {
 ///     ...
 ///   }
 ///
@@ -131,7 +131,7 @@ public:
   static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                           MLIRContext *context);
 
-  static StringRef getOperationName() { return "affine.for"; }
+  static StringRef getOperationName() { return "for"; }
   static StringRef getStepAttrName() { return "step"; }
   static StringRef getLowerBoundAttrName() { return "lower_bound"; }
   static StringRef getUpperBoundAttrName() { return "upper_bound"; }
@@ -253,15 +253,15 @@ ConstOpPointer<AffineForOp> getForInductionVarOwner(const Value *val);
 void extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
                              SmallVectorImpl<Value *> *ivs);
 
-/// Adds constraints (lower and upper bounds) for the specified 'affine.for'
+/// Adds constraints (lower and upper bounds) for the specified 'for'
 /// instruction's Value using IR information stored in its bound maps. The
 /// right identifier is first looked up using forOp's Value. Returns
 /// false for the yet unimplemented/unsupported cases, and true if the
 /// information is successfully added. Asserts if the Value corresponding to
-/// the 'affine.for' instruction isn't found in the constraint system. Any new
-/// identifiers that are found in the bound operands of the 'affine.for'
-/// instruction are added as trailing identifiers (either dimensional or
-/// symbolic depending on whether the operand is a valid ML Function symbol).
+/// the 'for' instruction isn't found in the constraint system. Any new
+/// identifiers that are found in the bound operands of the 'for' instruction
+/// are added as trailing identifiers (either dimensional or symbolic
+/// depending on whether the operand is a valid ML Function symbol).
 //  TODO(bondhugula): add support for non-unit strides.
 bool addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp,
                           FlatAffineConstraints *constraints);
@@ -297,10 +297,10 @@ public:
   operand_range getOperands() const { return {operand_begin(), operand_end()}; }
 
 private:
-  // 'affine.for' instruction that contains this bound.
+  // 'for' instruction that contains this bound.
   ConstOpPointer<AffineForOp> inst;
   // Start and end positions of this affine bound operands in the list of
-  // the containing 'affine.for' instruction operands.
+  // the containing 'for' instruction operands.
   unsigned opStart, opEnd;
   // Affine map for this bound.
   AffineMap map;
diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h
index 4e01bc962ed..ee72ac26a0d 100644
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@@ -52,7 +52,7 @@ bool dominates(const Instruction &a, const Instruction &b);
 bool properlyDominates(const Instruction &a, const Instruction &b);
 
 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'affine.for' instruction to the innermost one.
+/// the outermost 'for' instruction to the innermost one.
 //  TODO(bondhugula): handle 'affine.if' inst's.
 void getLoopIVs(const Instruction &inst,
                 SmallVectorImpl<OpPointer<AffineForOp>> *loops);
@@ -105,8 +105,8 @@ insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
 /// surrounding such op's.
 // For example, the memref region for a load operation at loop depth = 1:
 //
-//    affine.for %i = 0 to 32 {
-//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    for %i = 0 to 32 {
+//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@@ -139,8 +139,8 @@ struct MemRefRegion {
   ///  For example, the memref region for this operation at loopDepth = 1 will
   ///  be:
   ///
-  ///    affine.for %i = 0 to 32 {
-  ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
+  ///    for %i = 0 to 32 {
+  ///      for %ii = %i to (d0) -> (d0 + 8) (%i) {
   ///        load %A[%ii]
   ///      }
   ///    }
diff --git a/mlir/include/mlir/Analysis/VectorAnalysis.h b/mlir/include/mlir/Analysis/VectorAnalysis.h
index b3196e14097..4982481bf6c 100644
--- a/mlir/include/mlir/Analysis/VectorAnalysis.h
+++ b/mlir/include/mlir/Analysis/VectorAnalysis.h
@@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// The following MLIR snippet:
 ///
 /// ```mlir
-///    affine.for %i3 = 0 to %0 {
-///      affine.for %i4 = 0 to %1 {
-///        affine.for %i5 = 0 to %2 {
+///    for %i3 = 0 to %0 {
+///      for %i4 = 0 to %1 {
+///        for %i5 = 0 to %2 {
 ///          %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
 ///    }}}
 /// ```
@@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
 ///
 /// ```mlir
-///    affine.for %i3 = 0 to %0 step 32 {
-///      affine.for %i4 = 0 to %1 {
-///        affine.for %i5 = 0 to %2 step 256 {
+///    for %i3 = 0 to %0 step 32 {
+///      for %i4 = 0 to %1 {
+///        for %i5 = 0 to %2 step 256 {
 ///          %4 = vector_transfer_read %arg0, %i4, %i5, %i3
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
@@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 ///
 /// ```mlir
 ///    %cst0 = constant 0 : index
-///    affine.for %i0 = 0 to %0 {
+///    for %i0 = 0 to %0 {
 ///      %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
 ///    }
 /// ```
@@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0) -> (0)} into:
 ///
 /// ```mlir
-///    affine.for %i0 = 0 to %0 step 128 {
+///    for %i0 = 0 to %0 step 128 {
 ///      %3 = vector_transfer_read %arg0, %c0_0, %c0_0
 ///           {permutation_map: (d0, d1) -> (0)} :
 ///           (memref<?x?xf32>, index, index) -> vector<128xf32>
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index d543b520565..f3d9b9fe9fd 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -83,10 +83,9 @@ AffineMap getUnrolledLoopUpperBound(ConstOpPointer<AffineForOp> forOp,
                                     unsigned unrollFactor,
                                     FuncBuilder *builder);
 
-/// Skew the instructions in the body of a 'affine.for' instruction with the
-/// specified instruction-wise shifts. The shifts are with respect to the
-/// original execution order, and are multiplied by the loop 'step' before being
-/// applied.
+/// Skew the instructions in the body of a 'for' instruction with the specified
+/// instruction-wise shifts. The shifts are with respect to the original
+/// execution order, and are multiplied by the loop 'step' before being applied.
 UtilResult instBodySkew(OpPointer<AffineForOp> forOp, ArrayRef<uint64_t> shifts,
                         bool unrollPrologueEpilogue = false);
 
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
index eb7f725576a..3b828db6ae9 100644
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@@ -94,14 +94,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
 ///
 /// Before
 ///
-/// affine.for %i = 0 to #map(%N)
+/// for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %v = "compute"(%idx, ...)
 ///
 /// After
 ///
-/// affine.for %i = 0 to #map(%N)
+/// for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
diff --git a/mlir/lib/AffineOps/AffineOps.cpp b/mlir/lib/AffineOps/AffineOps.cpp
index 858b8bd791d..9da155f09d8 100644
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@@ -35,7 +35,7 @@ using llvm::dbgs;
 //===----------------------------------------------------------------------===//
 
 AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
-    : Dialect(/*namePrefix=*/"affine", context) {
+    : Dialect(/*namePrefix=*/"", context) {
   addOperations<AffineApplyOp, AffineForOp, AffineIfOp>();
 }
 
@@ -716,7 +716,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
 }
 
 void AffineForOp::print(OpAsmPrinter *p) const {
-  *p << "affine.for ";
+  *p << "for ";
   p->printOperand(getBody()->getArgument(0));
   *p << " = ";
   printBound(getLowerBound(), "max", p);
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index 3a086ba512d..9d2ea691bdd 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -756,8 +756,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 // For example, given the following MLIR code with with "source" and
 // "destination" accesses to the same memref labled, and symbols %M, %N, %K:
 //
-//   affine.for %i0 = 0 to 100 {
-//     affine.for %i1 = 0 to 50 {
+//   for %i0 = 0 to 100 {
+//     for %i1 = 0 to 50 {
 //       %a0 = affine.apply
 //         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
 //       // Source memref access.
@@ -765,8 +765,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 //     }
 //   }
 //
-//   affine.for %i2 = 0 to 100 {
-//     affine.for %i3 = 0 to 50 {
+//   for %i2 = 0 to 100 {
+//     for %i3 = 0 to 50 {
 //       %a1 = affine.apply
 //         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
 //       // Destination memref access.
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 823fbbe9fcd..0499e866fe8 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -36,12 +36,12 @@
 using namespace mlir;
 
 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'affine.for' instruction to the innermost one.
+/// the outermost 'for' instruction to the innermost one.
 void mlir::getLoopIVs(const Instruction &inst,
                       SmallVectorImpl<OpPointer<AffineForOp>> *loops) {
   auto *currInst = inst.getParentInst();
   OpPointer<AffineForOp> currAffineForOp;
-  // Traverse up the hierarchy collecing all 'affine.for' instruction while
+  // Traverse up the hierarchy collecing all 'for' instruction while
   // skipping over 'affine.if' instructions.
   while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
                       currInst->isa<AffineIfOp>())) {
@@ -111,8 +111,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 //  For example, the memref region for this load operation at loopDepth = 1 will
 //  be as below:
 //
-//    affine.for %i = 0 to 32 {
-//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    for %i = 0 to 32 {
+//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@@ -614,7 +614,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
                                                 int memorySpace) {
   std::vector<std::unique_ptr<MemRefRegion>> regions;
 
-  // Walk this 'affine.for' instruction to gather all memory regions.
+  // Walk this 'for' instruction to gather all memory regions.
   bool error = false;
   const_cast<Block *>(&block)->walk([&](Instruction *opInst) {
     if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp
index 90bc0b76efc..96582032b2b 100644
--- a/mlir/lib/IR/Block.cpp
+++ b/mlir/lib/IR/Block.cpp
@@ -189,7 +189,7 @@ unsigned Block::getNumSuccessors() const {
     return terminator->getNumSuccessors();
   }
   assert(getParent() && "top-level block with no terminator");
-  // Blocks inside 'affine.for'/'affine.if' instructions don't have successors.
+  // Blocks inside 'for'/'affine.if' instructions don't have successors.
   return 0;
 }
 
diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp
index dcb4828d0bf..bda98f46b61 100644
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@@ -338,7 +338,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
     auto fastMemRefType = top.getMemRefType(
         fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
 
-    // Create the fast memory space buffer just before the 'affine.for'
+    // Create the fast memory space buffer just before the 'for'
     // instruction.
     fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
     // Record it.
@@ -457,7 +457,7 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
   // approach is conservative in some cases at the moment, we do a check later
   // and report an error with location info.
   // TODO(bondhugula): An 'affine.if' instruction is being treated similar to an
-  // operation instruction. 'affine.if''s could have 'affine.for's in them;
+  // operation instruction. 'affine.if''s could have 'for's in them;
   // treat them separately.
 
   // Get to the first load, store, or for op.
@@ -471,7 +471,7 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
     if (auto forOp = it->dyn_cast<AffineForOp>()) {
       // We'll assume for now that loops with steps are tiled loops, and so DMAs
       // are not performed for that depth, but only further inside.
-      // If the memory footprint of the 'affine.for' loop is higher than fast
+      // If the memory footprint of the 'for' loop is higher than fast
       // memory capacity (when provided), we recurse to DMA at an inner level
       // until we find a depth at which footprint fits in the capacity. If the
       // footprint can't be calcuated, we assume for now it fits.
@@ -490,11 +490,11 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
         consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
         // Recurse onto the body of this loop.
         runOnBlock(forOp->getBody(), consumedCapacityBytes);
-        // The next region starts right after the 'affine.for' instruction.
+        // The next region starts right after the 'for' instruction.
         curBegin = std::next(it);
       } else {
         // We have enough capacity, i.e., DMAs will be computed for the portion
-        // of the block until 'it', and for the 'affine.for' loop. For the
+        // of the block until 'it', and for the 'for' loop. For the
         // latter, they are placed just before this loop (for incoming DMAs) and
         // right after (for outgoing ones).
         consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 9e96b0800b3..8d5f51059bf 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -510,8 +510,7 @@ bool MemRefDependenceGraph::init(Function *f) {
       // all loads and store accesses it contains.
       LoopNestStateCollector collector;
       collector.collect(&inst);
-      // Return false if a non 'affine.for' region was found (not currently
-      // supported).
+      // Return false if a non 'for' region was found (not currently supported).
       if (collector.hasNonForRegion)
         return false;
       Node node(nextNodeId++, &inst);
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp
index f00c2e767e6..368a1dac1df 100644
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@@ -231,8 +231,7 @@ UtilResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
 static void
 getTileableBands(Function *f,
                  std::vector<SmallVector<OpPointer<AffineForOp>, 6>> *bands) {
-  // Get maximal perfect nest of 'affine.for' insts starting from root
-  // (inclusive).
+  // Get maximal perfect nest of 'for' insts starting from root (inclusive).
   auto getMaximalPerfectLoopNest = [&](OpPointer<AffineForOp> root) {
     SmallVector<OpPointer<AffineForOp>, 6> band;
     OpPointer<AffineForOp> currInst = root;
diff --git a/mlir/lib/Transforms/LoopUnroll.cpp b/mlir/lib/Transforms/LoopUnroll.cpp
index 025a86891df..3a7cfb85e08 100644
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@@ -164,7 +164,7 @@ PassResult LoopUnroll::runOnFunction(Function *f) {
   return success();
 }
 
-/// Unrolls a 'affine.for' inst. Returns true if the loop was unrolled, false
+/// Unrolls a 'for' inst. Returns true if the loop was unrolled, false
 /// otherwise. The default unroll factor is 4.
 bool LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
   // Use the function callback if one was provided.
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index 2f0249824dd..b2aed7d9d7f 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -105,7 +105,7 @@ PassResult LoopUnrollAndJam::runOnFunction(Function *f) {
   return success();
 }
 
-/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
+/// Unroll and jam a 'for' inst. Default unroll jam factor is
 /// kDefaultUnrollJamFactor. Return false if nothing was done.
 bool LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
   // Unroll and jam by the factor that was passed if any.
diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp
index 5ce8a6258f4..ef6ff420912 100644
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@@ -283,8 +283,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
   return value;
 }
 
-// Convert a "affine.for" loop to a flow of blocks.  Return `false` on
-// success.
+// Convert a "for" loop to a flow of blocks.  Return `false` on success.
 //
 // Create an SESE region for the loop (including its body) and append it to the
 // end of the current region.  The loop region consists of the initialization
@@ -331,9 +330,8 @@ bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
   auto loc = forOp->getLoc();
   auto *forInst = forOp->getInstruction();
 
-  // Start by splitting the block containing the 'affine.for' into two parts.
-  // The part before will get the init code, the part after will be the end
-  // point.
+  // Start by splitting the block containing the 'for' into two parts.  The part
+  // before will get the init code, the part after will be the end point.
   auto *initBlock = forInst->getBlock();
   auto *endBlock = initBlock->splitBlock(forInst);
 
diff --git a/mlir/lib/Transforms/LowerVectorTransfers.cpp b/mlir/lib/Transforms/LowerVectorTransfers.cpp
index e63d3c8111c..63fb45db9c5 100644
--- a/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp
@@ -126,9 +126,9 @@ private:
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    affine.for %i0 = 0 to %0 {
-///      affine.for %i1 = 0 to %1 step 256 {
-///        affine.for %i2 = 0 to %2 step 32 {
+///    for %i0 = 0 to %0 {
+///      for %i1 = 0 to %1 step 256 {
+///        for %i2 = 0 to %2 step 32 {
 ///          %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@@ -139,8 +139,8 @@ private:
 /// MLIR resembling:
 ///
 /// ```mlir
-///    affine.for %d1 = 0 to 256 {
-///      affine.for %d2 = 0 to 32 {
+///    for %d1 = 0 to 256 {
+///      for %d2 = 0 to 32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp
index 4434ab5322e..be5a03bc416 100644
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@@ -101,10 +101,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
-///      vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
-///        affine.for %i1 = 0 to %N step 4 {
-///          affine.for %i2 = 0 to %O {
-///            affine.for %i3 = 0 to %P step 4 {
+///      vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
+///        for %i1 = 0 to %N step 4 {
+///          for %i2 = 0 to %O {
+///            for %i3 = 0 to %P step 4 {
 ///              vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
 ///                {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
 ///                 vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@@ -120,10 +120,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-///       affine.for %i0 = 0 to %arg0 step 4 {
-///         affine.for %i1 = 0 to %arg1 step 4 {
-///           affine.for %i2 = 0 to %arg2 {
-///             affine.for %i3 = 0 to %arg3 step 4 {
+///       for %i0 = 0 to %arg0 step 4 {
+///         for %i1 = 0 to %arg1 step 4 {
+///           for %i2 = 0 to %arg2 {
+///             for %i3 = 0 to %arg3 step 4 {
 ///               %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
 ///                    (%i0, %i1, %i2, %i3)
 ///               vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@@ -293,10 +293,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
 /// super-vectorization has been applied:
 ///
 /// ```mlir
-/// affine.for %i0 = 0 to %M {
-///   affine.for %i1 = 0 to %N step 3 {
-///     affine.for %i2 = 0 to %O {
-///       affine.for %i3 = 0 to %P step 32 {
+/// for %i0 = 0 to %M {
+///   for %i1 = 0 to %N step 3 {
+///     for %i2 = 0 to %O {
+///       for %i3 = 0 to %P step 32 {
 ///         %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
 ///                                   -> vector<3x32xf32>
 ///         ...
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
index 91a17764358..ad9801fea89 100644
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@@ -19,7 +19,7 @@
 // potentially getting rid of intermediate memref's entirely.
 // TODO(mlir-team): In the future, similar techniques could be used to eliminate
 // dead memref store's and perform more complex forwarding when support for
-// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
+// SSA scalars live out of 'for'/'affine.if' statements is available.
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Analysis/AffineAnalysis.h"
@@ -55,7 +55,7 @@ namespace {
 //
 // (* A dependence being satisfied at a block: a dependence that is satisfied by
 // virtue of the destination instruction appearing textually / lexically after
-// the source instruction within the body of a 'affine.for' instruction; thus, a
+// the source instruction within the body of a 'for' instruction; thus, a
 // dependence is always either satisfied by a loop or by a block).
 //
 // The above conditions are simple to check, sufficient, and powerful for most
@@ -145,8 +145,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
       // Check if this store is a candidate for forwarding; we only forward if
       // the dependence from the store is carried by the *body* of innermost
       // common surrounding loop. As an example this filters out cases like:
-      // affine.for %i0
-      //   affine.for %i1
+      // for %i0
+      //   for %i1
       //     %idx = affine.apply (d0) -> (d0 + 1) (%i0)
       //     store %A[%idx]
       //     load %A[%i0]
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index 84c8cd830dc..cfa045f2279 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -71,11 +71,11 @@ static unsigned getTagMemRefPos(const Instruction &dmaInst) {
   return 0;
 }
 
-/// Doubles the buffer of the supplied memref on the specified 'affine.for'
-/// instruction by adding a leading dimension of size two to the memref.
-/// Replaces all uses of the old memref by the new one while indexing the newly
-/// added dimension by the loop IV of the specified 'affine.for' instruction
-/// modulo 2. Returns false if such a replacement cannot be performed.
+/// Doubles the buffer of the supplied memref on the specified 'for' instruction
+/// by adding a leading dimension of size two to the memref. Replaces all uses
+/// of the old memref by the new one while indexing the newly added dimension by
+/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
+/// such a replacement cannot be performed.
 static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
   auto *forBody = forOp->getBody();
   FuncBuilder bInner(forBody, forBody->begin());
@@ -108,7 +108,7 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
                                                    dynamicDimCount++));
   }
 
-  // Create and place the alloc right before the 'affine.for' instruction.
+  // Create and place the alloc right before the 'for' instruction.
   // TODO(mlir-team): we are assuming scoped allocation here, and aren't
   // inserting a dealloc -- this isn't the right thing.
   Value *newMemRef =
@@ -137,9 +137,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
 /// Returns success if the IR is in a valid state.
 PassResult PipelineDataTransfer::runOnFunction(Function *f) {
   // Do a post order walk so that inner loop DMAs are processed first. This is
-  // necessary since 'affine.for' instructions nested within would otherwise
-  // become invalid (erased) when the outer loop is pipelined (the pipelined one
-  // gets deleted and replaced by a prologue, a new steady-state loop and an
+  // necessary since 'for' instructions nested within would otherwise become
+  // invalid (erased) when the outer loop is pipelined (the pipelined one gets
+  // deleted and replaced by a prologue, a new steady-state loop and an
   // epilogue).
   forOps.clear();
   f->walkPostOrder<AffineForOp>(
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 110949f43d5..a1903ace026 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -138,8 +138,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
       [](OpPointer<AffineForOp> forOp) { promoteIfSingleIteration(forOp); });
 }
 
-/// Generates a 'affine.for' inst with the specified lower and upper bounds
-/// while generating the right IV remappings for the shifted instructions. The
+/// Generates a 'for' inst with the specified lower and upper bounds while
+/// generating the right IV remappings for the shifted instructions. The
 /// instruction blocks that go into the loop are specified in instGroupQueue
 /// starting from the specified offset, and in that order; the first element of
 /// the pair specifies the shift applied to that group of instructions; note
@@ -194,10 +194,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
   return loopChunk;
 }
 
-/// Skew the instructions in the body of a 'affine.for' instruction with the
-/// specified instruction-wise shifts. The shifts are with respect to the
-/// original execution order, and are multiplied by the loop 'step' before being
-/// applied. A shift of zero for each instruction will lead to no change.
+/// Skew the instructions in the body of a 'for' instruction with the specified
+/// instruction-wise shifts. The shifts are with respect to the original
+/// execution order, and are multiplied by the loop 'step' before being applied.
+/// A shift of zero for each instruction will lead to no change.
 // The skewing of instructions with respect to one another can be used for
 // example to allow overlap of asynchronous operations (such as DMA
 // communication) with computation, or just relative shifting of instructions
@@ -246,7 +246,7 @@ UtilResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
 
   // An array of instruction groups sorted by shift amount; each group has all
   // instructions with the same shift in the order in which they appear in the
-  // body of the 'affine.for' inst.
+  // body of the 'for' inst.
   std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
   unsigned pos = 0;
   for (auto &inst : *forOp->getBody()) {
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
index 90d28bf34df..41689be52fc 100644
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@@ -194,14 +194,14 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef,
 ///
 /// Before
 ///
-/// affine.for %i = 0 to #map(%N)
+/// for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   "compute"(%idx)
 ///
 /// After
 ///
-/// affine.for %i = 0 to #map(%N)
+/// for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp
index 1f4c7b9fcc8..5a8d5d24661 100644
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@@ -113,7 +113,7 @@ using namespace mlir;
 ///
 /// At a high level, a vectorized load in a loop will resemble:
 /// ```mlir
-///   affine.for %i = ? to ? step ? {
+///   for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@@ -309,7 +309,7 @@ using namespace mlir;
 /// ```mlir
 /// mlfunc @fill(%A : memref<128xf32>) -> () {
 ///   %f1 = constant 1.0 : f32
-///   affine.for %i0 = 0 to 32 {
+///   for %i0 = 0 to 32 {
 ///     store %f1, %A[%i0] : memref<128xf32, 0>
 ///   }
 ///   return
@@ -322,7 +322,7 @@ using namespace mlir;
 /// is still subject to exploratory tradeoffs. In particular, say we want to
 /// vectorize by a factor 128, we want to transform the following input:
 /// ```mlir
-///   affine.for %i = %M to %N {
+///   for %i = %M to %N {
 ///     %a = load A[%i] : memref<?xf32>
 ///   }
 /// ```
@@ -331,8 +331,8 @@ using namespace mlir;
 /// memory promotion etc) say after stripmining (and potentially unrolling in
 /// the case of LLVM's SLP vectorizer):
 /// ```mlir
-///   affine.for %i = floor(%M, 128) to ceil(%N, 128) {
-///     affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
+///   for %i = floor(%M, 128) to ceil(%N, 128) {
+///     for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
 ///       %a = load A[%ii] : memref<?xf32>
 ///     }
 ///   }
@@ -341,7 +341,7 @@ using namespace mlir;
 /// Instead, we seek to vectorize early and freeze vector types before
 /// scheduling, so we want to generate a pattern that resembles:
 /// ```mlir
-///   affine.for %i = ? to ? step ? {
+///   for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@@ -362,7 +362,7 @@ using namespace mlir;
 /// For the simple strawman example above, vectorizing for a 1-D vector
 /// abstraction of size 128 returns code similar to:
 /// ```mlir
-///   affine.for %i = %M to %N step 128 {
+///   for %i = %M to %N step 128 {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@@ -391,20 +391,20 @@ using namespace mlir;
 ///   %C = alloc (%M, %N) : memref<?x?xf32, 0>
 ///   %f1 = constant 1.0 : f32
 ///   %f2 = constant 2.0 : f32
-///   affine.for %i0 = 0 to %M {
-///     affine.for %i1 = 0 to %N {
+///   for %i0 = 0 to %M {
+///     for %i1 = 0 to %N {
 ///       // non-scoped %f1
 ///       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   affine.for %i2 = 0 to %M {
-///     affine.for %i3 = 0 to %N {
+///   for %i2 = 0 to %M {
+///     for %i3 = 0 to %N {
 ///       // non-scoped %f2
 ///       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   affine.for %i4 = 0 to %M {
-///     affine.for %i5 = 0 to %N {
+///   for %i4 = 0 to %M {
+///     for %i5 = 0 to %N {
 ///       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
 ///       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
 ///       %s5 = addf %a5, %b5 : f32
@@ -438,24 +438,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   affine.for %i0 = 0 to %arg0 {
-///     affine.for %i1 = 0 to %arg1 step 256 {
+///   for %i0 = 0 to %arg0 {
+///     for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<256xf32>, 1.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   affine.for %i2 = 0 to %arg0 {
-///     affine.for %i3 = 0 to %arg1 step 256 {
+///   for %i2 = 0 to %arg0 {
+///     for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<256xf32>, 2.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   affine.for %i4 = 0 to %arg0 {
-///     affine.for %i5 = 0 to %arg1 step 256 {
+///   for %i4 = 0 to %arg0 {
+///     for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                      (memref<?x?xf32>, index, index) -> vector<256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
@@ -494,24 +494,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   affine.for %i0 = 0 to %arg0 step 32 {
-///     affine.for %i1 = 0 to %arg1 step 256 {
+///   for %i0 = 0 to %arg0 step 32 {
+///     for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   affine.for %i2 = 0 to %arg0 step 32 {
-///     affine.for %i3 = 0 to %arg1 step 256 {
+///   for %i2 = 0 to %arg0 step 32 {
+///     for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   affine.for %i4 = 0 to %arg0 step 32 {
-///     affine.for %i5 = 0 to %arg1 step 256 {
+///   for %i4 = 0 to %arg0 step 32 {
+///     for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                (memref<?x?xf32>, index, index) -> vector<32x256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
diff --git a/mlir/test/AffineOps/canonicalize.mlir b/mlir/test/AffineOps/canonicalize.mlir
index 163cfbe0985..ad6f39f3496 100644
--- a/mlir/test/AffineOps/canonicalize.mlir
+++ b/mlir/test/AffineOps/canonicalize.mlir
@@ -32,7 +32,7 @@
 func @compose_affine_maps_1dto2d_no_symbols() {
   %0 = alloc() : memref<4x4xf32>
 
-  affine.for %i0 = 0 to 15 {
+  for %i0 = 0 to 15 {
     // Test load[%x, %x]
 
     %x0 = affine.apply (d0) -> (d0 - 1) (%i0)
@@ -78,7 +78,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_with_symbols() {
   %0 = alloc() : memref<4x4xf32>
 
-  affine.for %i0 = 0 to 15 {
+  for %i0 = 0 to 15 {
     // Test load[%x0, %x0] with symbol %c4
     %c4 = constant 4 : index
     %x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
@@ -119,13 +119,13 @@ func @compose_affine_maps_2d_tile() {
   %c4 = constant 4 : index
   %c8 = constant 8 : index
 
-  affine.for %i0 = 0 to 3 {
+  for %i0 = 0 to 3 {
     %x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
-    affine.for %i1 = 0 to 3 {
+    for %i1 = 0 to 3 {
       %x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
-      affine.for %i2 = 0 to 3 {
+      for %i2 = 0 to 3 {
         %x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
-        affine.for %i3 = 0 to 3 {
+        for %i3 = 0 to 3 {
           %x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]
 
           %x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
@@ -151,9 +151,9 @@ func @compose_affine_maps_dependent_loads() {
   %0 = alloc() : memref<16x32xf32>
   %1 = alloc() : memref<16x32xf32>
 
-  affine.for %i0 = 0 to 3 {
-    affine.for %i1 = 0 to 3 {
-      affine.for %i2 = 0 to 3 {
+  for %i0 = 0 to 3 {
+    for %i1 = 0 to 3 {
+      for %i2 = 0 to 3 {
         %c3 = constant 3 : index
         %c7 = constant 7 : index
 
@@ -197,7 +197,7 @@ func @compose_affine_maps_dependent_loads() {
 func @compose_affine_maps_diamond_dependency() {
   %0 = alloc() : memref<4x4xf32>
 
-  affine.for %i0 = 0 to 15 {
+  for %i0 = 0 to 15 {
     %a = affine.apply (d0) -> (d0 - 1) (%i0)
     %b = affine.apply (d0) -> (d0 + 7) (%a)
     %c = affine.apply (d0) -> (d0 * 4) (%a)
@@ -217,8 +217,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
   %c9 = constant 9 : index
   %1 = alloc() : memref<100x100xf32, 1>
   %2 = alloc() : memref<1xi32>
-  affine.for %i0 = 0 to 100 {
-    affine.for %i1 = 0 to 100 {
+  for %i0 = 0 to 100 {
+    for %i1 = 0 to 100 {
       %3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
         (%i0, %i1)[%arg1, %c9]
       %4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
@@ -238,7 +238,7 @@ func @trivial_maps() {
   %0 = alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cst = constant 0.000000e+00 : f32
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %1 = affine.apply ()[s0] -> (s0)()[%c0]
     store %cst, %0[%1] : memref<10xf32>
     %2 = load %0[%c0] : memref<10xf32>
@@ -277,20 +277,20 @@ func @constant_fold_bounds(%N : index) {
   %c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
   %l = "foo"() : () -> index
 
-  // CHECK:  affine.for %i0 = 5 to 7 {
-  affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
+  // CHECK:  for %i0 = 5 to 7 {
+  for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
     "foo"(%i, %c3) : (index, index) -> ()
   }
 
   // Bound takes a non-constant argument but can still be folded.
-  // CHECK:  affine.for %i1 = 1 to 7 {
-  affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
+  // CHECK:  for %i1 = 1 to 7 {
+  for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
     "foo"(%j, %c3) : (index, index) -> ()
   }
 
   // None of the bounds can be folded.
-  // CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
-  affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
+  // CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
+  for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
     "foo"(%k, %c3) : (index, index) -> ()
   }
   return
diff --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index 99e0f682216..bd7e062063c 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -204,35 +204,35 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
 // -----
 
 func @malformed_for_percent() {
-  affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}
+  for i = 1 to 10 { // expected-error {{expected SSA operand}}
 
 // -----
 
 func @malformed_for_equal() {
-  affine.for %i 1 to 10 { // expected-error {{expected '='}}
+  for %i 1 to 10 { // expected-error {{expected '='}}
 
 // -----
 
 func @malformed_for_to() {
-  affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
+  for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
   }
 }
 
 // -----
 
 func @incomplete_for() {
-  affine.for %i = 1 to 10 step 2
+  for %i = 1 to 10 step 2
 }        // expected-error {{expected '{' to begin block list}}
 
 // -----
 
 func @nonconstant_step(%1 : i32) {
-  affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
+  for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
 
 // -----
 
 func @for_negative_stride() {
-  affine.for %i = 1 to 10 step -1
+  for %i = 1 to 10 step -1
 }        // expected-error@-1 {{expected step to be representable as a positive signed integer}}
 
 // -----
@@ -244,7 +244,7 @@ func @non_instruction() {
 // -----
 
 func @invalid_if_conditional2() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if (i)[N] : (i >= )  // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -252,7 +252,7 @@ func @invalid_if_conditional2() {
 // -----
 
 func @invalid_if_conditional3() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
   }
 }
@@ -260,7 +260,7 @@ func @invalid_if_conditional3() {
 // -----
 
 func @invalid_if_conditional4() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
   }
 }
@@ -268,7 +268,7 @@ func @invalid_if_conditional4() {
 // -----
 
 func @invalid_if_conditional5() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -276,7 +276,7 @@ func @invalid_if_conditional5() {
 // -----
 
 func @invalid_if_conditional6() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -284,7 +284,7 @@ func @invalid_if_conditional6() {
 // -----
 // TODO (support affine.if (1)?
 func @invalid_if_conditional7() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -438,8 +438,8 @@ func @undef() {
 // -----
 
 func @duplicate_induction_var() {
-  affine.for %i = 1 to 10 {   // expected-error {{previously defined here}}
-    affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
+  for %i = 1 to 10 {   // expected-error {{previously defined here}}
+    for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
     }
   }
   return
@@ -448,7 +448,7 @@ func @duplicate_induction_var() {
 // -----
 
 func @dominance_failure() {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
   }
   "xxx"(%i) : (index)->()   // expected-error {{operand #0 does not dominate this use}}
   return
@@ -475,7 +475,7 @@ func @return_type_mismatch() -> i32 {
 // -----
 
 func @return_inside_loop() -> i8 {
-  affine.for %i = 1 to 100 {
+  for %i = 1 to 100 {
     %a = "foo"() : ()->i8
     return %a : i8
     // expected-error@-1 {{'return' op may only be at the top level of a function}}
@@ -521,7 +521,7 @@ func @referer() {
 #map1 = (i)[j] -> (i+j)
 
 func @bound_symbol_mismatch(%N : index) {
-  affine.for %i = #map1(%N) to 100 {
+  for %i = #map1(%N) to 100 {
   // expected-error@-1 {{symbol operand count and integer set symbol count must match}}
   }
   return
@@ -532,7 +532,7 @@ func @bound_symbol_mismatch(%N : index) {
 #map1 = (i)[j] -> (i+j)
 
 func @bound_dim_mismatch(%N : index) {
-  affine.for %i = #map1(%N, %N)[%N] to 100 {
+  for %i = #map1(%N, %N)[%N] to 100 {
   // expected-error@-1 {{dim operand count and integer set dim count must match}}
   }
   return
@@ -541,7 +541,7 @@ func @bound_dim_mismatch(%N : index) {
 // -----
 
 func @large_bound() {
-  affine.for %i = 1 to 9223372036854775810 {
+  for %i = 1 to 9223372036854775810 {
   // expected-error@-1 {{integer constant out of range for attribute}}
   }
   return
@@ -550,7 +550,7 @@ func @large_bound() {
 // -----
 
 func @max_in_upper_bound(%N : index) {
-  affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
+  for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
   }
   return
 }
@@ -558,7 +558,7 @@ func @max_in_upper_bound(%N : index) {
 // -----
 
 func @step_typo() {
-  affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
+  for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
   }
   return
 }
@@ -566,7 +566,7 @@ func @step_typo() {
 // -----
 
 func @invalid_bound_map(%N : i32) {
-  affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
+  for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
   }
   return
 }
@@ -579,7 +579,7 @@ func @invalid_bound_map(%N : i32) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @invalid_if_operands1(%N : index) {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if #set0(%i) {
     // expected-error@-1 {{symbol operand count and integer set symbol count must match}}
 
@@ -587,7 +587,7 @@ func @invalid_if_operands1(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @invalid_if_operands2(%N : index) {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if #set0()[%N] {
     // expected-error@-1 {{dim operand count and integer set dim count must match}}
 
@@ -595,7 +595,7 @@ func @invalid_if_operands2(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @invalid_if_operands3(%N : index) {
-  affine.for %i = 1 to 10 {
+  for %i = 1 to 10 {
     affine.if #set0(%i)[%i] {
     // expected-error@-1 {{operand cannot be used as a symbol}}
     }
@@ -736,11 +736,11 @@ func @f(f32) {
 // -----
 
 func @f(%m : memref<?x?xf32>) {
-  affine.for %i0 = 0 to 42 {
+  for %i0 = 0 to 42 {
     // expected-error@+1 {{operand #2 does not dominate this use}}
     %x = load %m[%i0, %i1] : memref<?x?xf32>
   }
-  affine.for %i1 = 0 to 42 {
+  for %i1 = 0 to 42 {
   }
   return
 }
@@ -790,7 +790,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t
 
 // Check ill-formed opaque tensor.
 func @complex_loops() {
-  affine.for %i1 = 1 to 100 {
+  for %i1 = 1 to 100 {
   // expected-error @+1 {{expected '"' in string literal}}
   "opaqueIntTensor"(){bar: opaque<tensor<2x1x4xi32>, "0x686]>} : () -> ()
 
@@ -824,7 +824,7 @@ func @invalid_affine_structure() {
 
 func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
   // expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
-  affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
+  for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
   }
   return
 }
@@ -833,7 +833,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 
 func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
   // expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
-  affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
+  for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
   }
   return
 }
diff --git a/mlir/test/IR/locations.mlir b/mlir/test/IR/locations.mlir
index ac4925e3e52..3b27301cfae 100644
--- a/mlir/test/IR/locations.mlir
+++ b/mlir/test/IR/locations.mlir
@@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
   %2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))
 
   // CHECK: } loc(fused["foo", "mysource.cc":10:8])
-  affine.for %i0 = 0 to 8 {
+  for %i0 = 0 to 8 {
   } loc(fused["foo", "mysource.cc":10:8])
 
   // CHECK: } loc(fused<"myPass">["foo", "foo2"])
diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index a194c52344a..8fa3116a139 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ())  {
 func @func_ops_in_loop() {
   // CHECK: %0 = "foo"() : () -> i64
   %a = "foo"() : ()->i64
-  // CHECK: affine.for %i0 = 1 to 10 {
-  affine.for %i = 1 to 10 {
+  // CHECK: for %i0 = 1 to 10 {
+  for %i = 1 to 10 {
     // CHECK: %1 = "doo"() : () -> f32
     %b = "doo"() : ()->f32
     // CHECK: "bar"(%0, %1) : (i64, f32) -> ()
@@ -224,10 +224,10 @@ func @func_ops_in_loop() {
 
 // CHECK-LABEL: func @loops() {
 func @loops() {
-  // CHECK: affine.for %i0 = 1 to 100 step 2 {
-  affine.for %i = 1 to 100 step 2 {
-    // CHECK: affine.for %i1 = 1 to 200 {
-    affine.for %j = 1 to 200 {
+  // CHECK: for %i0 = 1 to 100 step 2 {
+  for %i = 1 to 100 step 2 {
+    // CHECK: for %i1 = 1 to 200 {
+    for %j = 1 to 200 {
     }        // CHECK:     }
   }          // CHECK:   }
   return     // CHECK:   return
@@ -235,14 +235,14 @@ func @loops() {
 
 // CHECK-LABEL: func @complex_loops() {
 func @complex_loops() {
-  affine.for %i1 = 1 to 100 {      // CHECK:   affine.for %i0 = 1 to 100 {
-    affine.for %j1 = 1 to 100 {    // CHECK:     affine.for %i1 = 1 to 100 {
+  for %i1 = 1 to 100 {      // CHECK:   for %i0 = 1 to 100 {
+    for %j1 = 1 to 100 {    // CHECK:     for %i1 = 1 to 100 {
        // CHECK: "foo"(%i0, %i1) : (index, index) -> ()
        "foo"(%i1, %j1) : (index,index) -> ()
     }                       // CHECK:     }
     "boo"() : () -> ()      // CHECK:     "boo"() : () -> ()
-    affine.for %j2 = 1 to 10 {     // CHECK:     affine.for %i2 = 1 to 10 {
-      affine.for %k2 = 1 to 10 {   // CHECK:       affine.for %i3 = 1 to 10 {
+    for %j2 = 1 to 10 {     // CHECK:     for %i2 = 1 to 10 {
+      for %k2 = 1 to 10 {   // CHECK:       for %i3 = 1 to 10 {
         "goo"() : () -> ()  // CHECK:         "goo"() : () -> ()
       }                     // CHECK:       }
     }                       // CHECK:     }
@@ -253,8 +253,8 @@ func @complex_loops() {
 // CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
 func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
   %c = constant 0 : i32       // CHECK: %c0_i32 = constant 0 : i32
-  affine.for %i0 = 1 to %arg0 {      // CHECK: affine.for %i0 = 1 to %arg0 {
-    affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
+  for %i0 = 1 to %arg0 {      // CHECK: for %i0 = 1 to %arg0 {
+    for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
       store %c, %arg1[%i0, %i1] : memref<?x?xi32>  // CHECK: store %c0_i32, %arg1[%i0, %i1]
     }          // CHECK:     }
   }            // CHECK:   }
@@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
 
 // CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
-  // CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
-  affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
+  // CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
+  for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
     // CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
     "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
   }      // CHECK:   }
@@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @loop_bounds(%N : index) {
   // CHECK: %0 = "foo"(%arg0) : (index) -> index
   %s = "foo"(%N) : (index) -> index
-  // CHECK: affine.for %i0 = %0 to %arg0
-  affine.for %i = %s to %N {
-    // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
-    affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
+  // CHECK: for %i0 = %0 to %arg0
+  for %i = %s to %N {
+    // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
+    for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
        // CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
        %w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
        // CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
        %w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
-       // CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
-       affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
+       // CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
+       for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
           // CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
           "foo"(%i, %j, %k) : (index, index, index)->()
           // CHECK: %c30 = constant 30 : index
           %c = constant 30 : index
           // CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
           %u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
-          // CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
-          affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
+          // CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
+          for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
             // CHECK: "bar"(%i3) : (index) -> ()
             "bar"(%l) : (index) -> ()
           } // CHECK:           }
@@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
 // CHECK-LABEL: func @ifinst(%arg0: index) {
 func @ifinst(%N: index) {
   %c = constant 200 : index // CHECK   %c200 = constant 200
-  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
+  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
     affine.if #set0(%i)[%N, %c] {     // CHECK     affine.if #set0(%i0)[%arg0, %c200] {
       %x = constant 1 : i32
        // CHECK: %c1_i32 = constant 1 : i32
@@ -328,7 +328,7 @@ func @ifinst(%N: index) {
 // CHECK-LABEL: func @simple_ifinst(%arg0: index) {
 func @simple_ifinst(%N: index) {
   %c = constant 200 : index // CHECK   %c200 = constant 200
-  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
+  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
     affine.if #set0(%i)[%N, %c] {     // CHECK     affine.if #set0(%i0)[%arg0, %c200] {
       %x = constant 1 : i32
        // CHECK: %c1_i32 = constant 1 : i32
@@ -544,18 +544,18 @@ func @funcattrwithblock() -> ()
 #map_non_simple2 = ()[s0, s1] -> (s0 + s1)
 #map_non_simple3 = ()[s0] -> (s0 + 3)
 func @funcsimplemap(%arg0: index, %arg1: index) -> () {
-  affine.for %i0 = 0 to #map_simple0()[] {
-  // CHECK: affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to #map_simple1()[%arg1] {
-    // CHECK: affine.for %i1 = 0 to %arg1 {
-      affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
-      // CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
-        affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
-        // CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
-          affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
-          // CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
-            affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
-            // CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
+  for %i0 = 0 to #map_simple0()[] {
+  // CHECK: for %i0 = 0 to 10 {
+    for %i1 = 0 to #map_simple1()[%arg1] {
+    // CHECK: for %i1 = 0 to %arg1 {
+      for %i2 = 0 to #map_non_simple0(%i0)[] {
+      // CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
+        for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
+        // CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
+          for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
+          // CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
+            for %i5 = 0 to #map_non_simple3()[%arg0] {
+            // CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
               %c42_i32 = constant 42 : i32
             }
           }
@@ -749,9 +749,9 @@ func @sparsevectorattr() -> () {
 // CHECK-LABEL: func @loops_with_blockids() {
 func @loops_with_blockids() {
 ^block0:
-  affine.for %i = 1 to 100 step 2 {
+  for %i = 1 to 100 step 2 {
   ^block1:
-    affine.for %j = 1 to 200 {
+    for %j = 1 to 200 {
     ^block2:
     }
   }
diff --git a/mlir/test/IR/pretty-locations.mlir b/mlir/test/IR/pretty-locations.mlir
index defde9e9c70..bc5a319c99e 100644
--- a/mlir/test/IR/pretty-locations.mlir
+++ b/mlir/test/IR/pretty-locations.mlir
@@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
   %3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))
 
   // CHECK: } ["foo", mysource.cc:10:8]
-  affine.for %i0 = 0 to 8 {
+  for %i0 = 0 to 8 {
   } loc(fused["foo", "mysource.cc":10:8])
 
   // CHECK: } <"myPass">["foo", "foo2"]
diff --git a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
index e896e0588d3..b82ac08fe59 100644
--- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
+++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
@@ -6,8 +6,8 @@
 // CHECK-LABEL: func @materialize_read_1d() {
 func @materialize_read_1d() {
   %A = alloc () : memref<7x42xf32>
-  affine.for %i0 = 0 to 7 step 4 {
-    affine.for %i1 = 0 to 42 step 4 {
+  for %i0 = 0 to 7 step 4 {
+    for %i1 = 0 to 42 step 4 {
       %f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
       %ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
       %f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
@@ -29,11 +29,11 @@ func @materialize_read_1d() {
 // CHECK-LABEL: func @materialize_read_1d_partially_specialized
 func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
   %A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
-  affine.for %i0 = 0 to 7 {
-    affine.for %i1 = 0 to %dyn1 {
-      affine.for %i2 = 0 to %dyn2 {
-        affine.for %i3 = 0 to 42 step 2 {
-          affine.for %i4 = 0 to %dyn4 {
+  for %i0 = 0 to 7 {
+    for %i1 = 0 to %dyn1 {
+      for %i2 = 0 to %dyn2 {
+        for %i3 = 0 to 42 step 2 {
+          for %i4 = 0 to %dyn4 {
             %f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
             %i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
             %f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
@@ -54,10 +54,10 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
 // CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
 func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
-  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 {
-  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 {
+  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
   // CHECK-NEXT:          %[[C0:.*]] = constant 0 : index
   // CHECK-NEXT:          %[[C1:.*]] = constant 1 : index
   //      CHECK:          {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
@@ -66,9 +66,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:          {{.*}} = dim %0, 3 : memref<?x?x?x?xf32>
   //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
   // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]]
   // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]]
@@ -109,10 +109,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:  return
   // CHECK-NEXT:}
   %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
-  affine.for %i0 = 0 to %M step 3 {
-    affine.for %i1 = 0 to %N {
-      affine.for %i2 = 0 to %O {
-        affine.for %i3 = 0 to %P step 5 {
+  for %i0 = 0 to %M step 3 {
+    for %i1 = 0 to %N {
+      for %i2 = 0 to %O {
+        for %i3 = 0 to %P step 5 {
           %f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
         }
       }
@@ -125,10 +125,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
 func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
   // CHECK-NEXT:  %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
   // CHECK-NEXT:          %[[C0:.*]] = constant 0 : index
   // CHECK-NEXT:          %[[C1:.*]] = constant 1 : index
   // CHECK:               {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
@@ -138,9 +138,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK:               %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
   // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
   // CHECK-NEXT:          store %cst, {{.*}}[%[[C0]]] : memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
   // CHECK-NEXT:                {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32>
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
   // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
@@ -184,10 +184,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:}
   %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
   %f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  affine.for %i0 = 0 to %M step 3 {
-    affine.for %i1 = 0 to %N step 4 {
-      affine.for %i2 = 0 to %O {
-        affine.for %i3 = 0 to %P step 5 {
+  for %i0 = 0 to %M step 3 {
+    for %i1 = 0 to %N step 4 {
+      for %i2 = 0 to %O {
+        for %i3 = 0 to %P step 5 {
           vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
         }
       }
diff --git a/mlir/test/Transforms/Vectorize/materialize.mlir b/mlir/test/Transforms/Vectorize/materialize.mlir
index ce445ec75bb..80458c75333 100644
--- a/mlir/test/Transforms/Vectorize/materialize.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize.mlir
@@ -10,10 +10,10 @@
 func @materialize(%M : index, %N : index, %O : index, %P : index) {
   %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
   %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-  // CHECK:  affine.for %i0 = 0 to %arg0 step 4 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      affine.for %i2 = 0 to %arg2 {
-  // CHECK-NEXT:        affine.for %i3 = 0 to %arg3 step 4 {
+  // CHECK:  for %i0 = 0 to %arg0 step 4 {
+  // CHECK-NEXT:    for %i1 = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      for %i2 = 0 to %arg2 {
+  // CHECK-NEXT:        for %i3 = 0 to %arg3 step 4 {
   // CHECK-NEXT:          %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
   // CHECK-NEXT:          %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
   // CHECK-NEXT:          %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
@@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
   // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
   // CHECK:          %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
   // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
-  affine.for %i0 = 0 to %M step 4 {
-    affine.for %i1 = 0 to %N step 4 {
-      affine.for %i2 = 0 to %O {
-        affine.for %i3 = 0 to %P step 4 {
+  for %i0 = 0 to %M step 4 {
+    for %i1 = 0 to %N step 4 {
+      for %i2 = 0 to %O {
+        for %i3 = 0 to %P step 4 {
           "vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
         }
       }
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
index 71c442b965e..b5f771d7e62 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
@@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   // 4x unroll (jammed by construction).
-  // CHECK: affine.for %i0 = 0 to %arg0 {
-  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
+  // CHECK: for %i0 = 0 to %arg0 {
+  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
   // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
   //
-  affine.for %i0 = 0 to %M {
-    affine.for %i1 = 0 to %N {
+  for %i0 = 0 to %M {
+    for %i1 = 0 to %N {
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
   // 4x unroll (jammed by construction).
-  // CHECK: affine.for %i2 = 0 to %arg0 {
-  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
+  // CHECK: for %i2 = 0 to %arg0 {
+  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
@@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
   // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
   //
-  affine.for %i2 = 0 to %M {
-    affine.for %i3 = 0 to %N {
+  for %i2 = 0 to %M {
+    for %i3 = 0 to %N {
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
   // 4x unroll (jammed by construction).
-  // CHECK: affine.for %i4 = 0 to %arg0 {
-  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
+  // CHECK: for %i4 = 0 to %arg0 {
+  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = vector_transfer_read
@@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     vector_transfer_write
   //
-  affine.for %i4 = 0 to %M {
-    affine.for %i5 = 0 to %N {
+  for %i4 = 0 to %M {
+    for %i5 = 0 to %N {
       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
       %s5 = addf %a5, %b5 : f32
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
index 62149c323b6..92df49fa8fa 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
@@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   // (3x2)x unroll (jammed by construction).
-  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 16 {
+  // CHECK: for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 16 {
   // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
   // CHECK-NEXT:     [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
   // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
-  affine.for %i0 = 0 to %M {
-    affine.for %i1 = 0 to %N {
+  for %i0 = 0 to %M {
+    for %i1 = 0 to %N {
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
   // (3x2)x unroll (jammed by construction).
-  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 16 {
+  // CHECK: for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 16 {
   // .....
-  affine.for %i2 = 0 to %M {
-    affine.for %i3 = 0 to %N {
+  for %i2 = 0 to %M {
+    for %i3 = 0 to %N {
       // non-scoped %f2
       // CHECK does (3x4)x unrolling.
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
   // (3x2)x unroll (jammed by construction).
-  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 16 {
+  // CHECK: for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 16 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = vector_transfer_read
@@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     vector_transfer_write
   //
-  affine.for %i4 = 0 to %M {
-    affine.for %i5 = 0 to %N {
+  for %i4 = 0 to %M {
+    for %i5 = 0 to %N {
       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
       %s5 = addf %a5, %b5 : f32
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
index 59705eca69e..36ec96e30b4 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
@@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   // 2x unroll (jammed by construction).
-  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
+  // CHECK: for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
@@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
   // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
   //
-  affine.for %i0 = 0 to %M {
-    affine.for %i1 = 0 to %N {
+  for %i0 = 0 to %M {
+    for %i1 = 0 to %N {
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
   // 2x unroll (jammed by construction).
-  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
+  // CHECK: for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
@@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
   // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
   //
-  affine.for %i2 = 0 to %M {
-    affine.for %i3 = 0 to %N {
+  for %i2 = 0 to %M {
+    for %i3 = 0 to %N {
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
   // 2x unroll (jammed by construction).
-  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
+  // CHECK: for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = vector_transfer_read
@@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     vector_transfer_write
   //
-  affine.for %i4 = 0 to %M {
-    affine.for %i5 = 0 to %N {
+  for %i4 = 0 to %M {
+    for %i5 = 0 to %N {
       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
       %s5 = addf %a5, %b5 : f32
diff --git a/mlir/test/Transforms/Vectorize/normalize_maps.mlir b/mlir/test/Transforms/Vectorize/normalize_maps.mlir
index 076d2c75633..9569dbe07fe 100644
--- a/mlir/test/Transforms/Vectorize/normalize_maps.mlir
+++ b/mlir/test/Transforms/Vectorize/normalize_maps.mlir
@@ -9,19 +9,19 @@
 
 // CHECK-LABEL: func @simple()
 func @simple() {
-  affine.for %i0 = 0 to 7 {
+  for %i0 = 0 to 7 {
     %0 = affine.apply (d0) -> (d0) (%i0)
     %1 = affine.apply (d0) -> (d0) (%0)
     %2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
     %3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
   }
-  // CHECK-NEXT: affine.for %i0 = 0 to 7
+  // CHECK-NEXT: for %i0 = 0 to 7
   // CHECK-NEXT:   {{.*}} affine.apply #[[ID1]](%i0)
   // CHECK-NEXT:   {{.*}} affine.apply #[[D0TIMES2]](%i0)
   // CHECK-NEXT:   {{.*}} affine.apply #[[ZERO]]()
 
-  affine.for %i1 = 0 to 7 {
-    affine.for %i2 = 0 to 42 {
+  for %i1 = 0 to 7 {
+    for %i2 = 0 to 42 {
       %20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
       %21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
       %22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
@@ -29,15 +29,15 @@ func @simple() {
       %24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
     }
   }
-  //      CHECK: affine.for %i1 = 0 to 7
-  // CHECK-NEXT:   affine.for %i2 = 0 to 42
+  //      CHECK: for %i1 = 0 to 7
+  // CHECK-NEXT:   for %i2 = 0 to 42
   // CHECK-NEXT:     {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
   // CHECK-NEXT:     {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
   // CHECK-NEXT:     {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)
 
-  affine.for %i3 = 0 to 16 {
-    affine.for %i4 = 0 to 47 step 2 {
-      affine.for %i5 = 0 to 78 step 16 {
+  for %i3 = 0 to 16 {
+    for %i4 = 0 to 47 step 2 {
+      for %i5 = 0 to 78 step 16 {
         %50 = affine.apply (d0) -> (d0) (%i3)
         %51 = affine.apply (d0) -> (d0) (%i4)
         %52 = affine.apply (d0) -> (d0) (%i5)
@@ -47,9 +47,9 @@ func @simple() {
       }
     }
   }
-  // CHECK:      affine.for %i3 = 0 to 16
-  // CHECK-NEXT:   affine.for %i4 = 0 to 47 step 2
-  // CHECK-NEXT:     affine.for %i5 = 0 to 78 step 16
+  // CHECK:      for %i3 = 0 to 16
+  // CHECK-NEXT:   for %i4 = 0 to 47 step 2
+  // CHECK-NEXT:     for %i5 = 0 to 78 step 16
   // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i3)
   // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i4)
   // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i5)
diff --git a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
index c812db2d498..05e31dbdea5 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
@@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for {{.*}} step 128
 // CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
-   affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
+   for %i0 = 0 to %M { // vectorized due to scalar -> vector
      %a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
    }
 //
 // CHECK:for {{.*}} [[ARG_M]] {
-   affine.for %i1 = 0 to %M { // not vectorized
+   for %i1 = 0 to %M { // not vectorized
      %a1 = load %A[%i1, %i1] : memref<?x?xf32>
    }
 //
-// CHECK:   affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
-   affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+// CHECK:   for %i{{[0-9]*}} = 0 to [[ARG_M]] {
+   for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
      %r2 = affine.apply (d0) -> (d0) (%i2)
      %a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
    }
@@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK-NEXT:   [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   affine.for %i3 = 0 to %M { // vectorized
+   for %i3 = 0 to %M { // vectorized
      %r3 = affine.apply (d0) -> (d0) (%i3)
      %a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
    }
@@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   affine.for %i4 = 0 to %M { // vectorized
-     affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
+   for %i4 = 0 to %M { // vectorized
+     for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
        %r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
        %r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
        %a5 = load %A[%r50, %r51] : memref<?x?xf32>
@@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
 // CHECK-NEXT:   for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
-   affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
-     affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
+   for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+     for %i7 = 0 to %N { // not vectorized, can never vectorize
        %r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
        %r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
        %a7 = load %A[%r70, %r71] : memref<?x?xf32>
@@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   affine.for %i8 = 0 to %M { // vectorized
-     affine.for %i9 = 0 to %N {
+   for %i8 = 0 to %M { // vectorized
+     for %i9 = 0 to %N {
        %r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
        %r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
        %a9 = load %A[%r90, %r91] : memref<?x?xf32>
@@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
-   affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
-     affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
+   for %i10 = 0 to %M { // not vectorized, need per load transposes
+     for %i11 = 0 to %N { // not vectorized, need per load transposes
        %r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
        %r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
        %a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
@@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:     for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
-   affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
-     affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
-       affine.for %i14 = 0 to %P { // vectorized
+   for %i12 = 0 to %M { // not vectorized, can never vectorize
+     for %i13 = 0 to %N { // not vectorized, can never vectorize
+       for %i14 = 0 to %P { // vectorized
          %r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
          %r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
          %r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
@@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
      }
    }
 //
-// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   affine.for %i15 = 0 to %M { // not vectorized due to condition below
+// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   for %i15 = 0 to %M { // not vectorized due to condition below
      affine.if #set0(%i15) {
        %a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
      }
    }
 //
-// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
+// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
      %a16 = alloc(%M) : memref<?xvector<2xf32>>
      %l16 = load %a16[%i16] : memref<?xvector<2xf32>>
    }
 //
-// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK:     {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
-   affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
-     affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
+   for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
+     for %i18 = 0 to %M { // vectorized due to scalar -> vector
        %a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
      }
    }
@@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %C = alloc (%M, %N) : memref<?x?xf32, 0>
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
-  affine.for %i0 = 0 to %M {
-    affine.for %i1 = 0 to %N {
+  for %i0 = 0 to %M {
+    for %i1 = 0 to %N {
       // CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
       // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
-  affine.for %i2 = 0 to %M {
-    affine.for %i3 = 0 to %N {
+  for %i2 = 0 to %M {
+    for %i3 = 0 to %N {
       // CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
       // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
-  affine.for %i4 = 0 to %M {
-    affine.for %i5 = 0 to %N {
+  for %i4 = 0 to %M {
+    for %i5 = 0 to %N {
       // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
       // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
       // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
@@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
 // CHECK-LABEL: @vec_rejected
 func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
   %N = dim %A, 0 : memref<?x?xf32>
-  affine.for %i = 0 to %N {
+  for %i = 0 to %N {
 // CHECK-NOT: vector
     %a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
-    affine.for %j = 0 to %N {
+    for %j = 0 to %N {
       %b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
 // CHECK-NOT: vector
       %c = addf %a, %b : f32 // not vectorized because %a wasn't
diff --git a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
index 59c7483749b..d847f6bb5ce 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
@@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:   for {{.*}} = 0 to %1 step 32
    // CHECK:     for {{.*}} = 0 to %2 step 256
    // Example:
-   // affine.for %i0 = 0 to %0 {
-   //   affine.for %i1 = 0 to %1 step 32 {
-   //     affine.for %i2 = 0 to %2 step 256 {
+   // for %i0 = 0 to %0 {
+   //   for %i1 = 0 to %1 step 32 {
+   //     for %i2 = 0 to %2 step 256 {
    //       %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   affine.for %i0 = 0 to %M {
-     affine.for %i1 = 0 to %N {
-       affine.for %i2 = 0 to %P {
+   for %i0 = 0 to %M {
+     for %i1 = 0 to %N {
+       for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
@@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
    // vectorization happens because of loop nesting order .
-   affine.for %i3 = 0 to %M {
-     affine.for %i4 = 0 to %N {
-       affine.for %i5 = 0 to %P {
+   for %i3 = 0 to %M {
+     for %i4 = 0 to %N {
+       for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
@@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %C = alloc (%M, %N) : memref<?x?xf32, 0>
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
-  affine.for %i0 = 0 to %M {
-    affine.for %i1 = 0 to %N {
+  for %i0 = 0 to %M {
+    for %i1 = 0 to %N {
       // CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
       // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
-  affine.for %i2 = 0 to %M {
-    affine.for %i3 = 0 to %N {
+  for %i2 = 0 to %M {
+    for %i3 = 0 to %N {
       // CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
       // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]}  : vector<32x256xf32>, memref<?x?xf32>, index, index
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
-  affine.for %i4 = 0 to %M {
-    affine.for %i5 = 0 to %N {
+  for %i4 = 0 to %M {
+    for %i5 = 0 to %N {
       // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
       // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
       // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
diff --git a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
index 08ca27dbeee..1a6bee585ee 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
@@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
    %0 = dim %A, 0 : memref<?x?x?xf32>
    %1 = dim %A, 1 : memref<?x?x?xf32>
    %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: affine.for %i0 = 0 to %0 {
-   // CHECK:   affine.for %i1 = 0 to %0 {
-   // CHECK:     affine.for %i2 = 0 to %0 step 32 {
-   // CHECK:       affine.for %i3 = 0 to %1 step 64 {
-   // CHECK:         affine.for %i4 = 0 to %2 step 256 {
+   // CHECK: for %i0 = 0 to %0 {
+   // CHECK:   for %i1 = 0 to %0 {
+   // CHECK:     for %i2 = 0 to %0 step 32 {
+   // CHECK:       for %i3 = 0 to %1 step 64 {
+   // CHECK:         for %i4 = 0 to %2 step 256 {
    // CHECK:           %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
-   affine.for %t0 = 0 to %0 {
-     affine.for %t1 = 0 to %0 {
-       affine.for %i0 = 0 to %0 {
-         affine.for %i1 = 0 to %1 {
-           affine.for %i2 = 0 to %2 {
+   for %t0 = 0 to %0 {
+     for %t1 = 0 to %0 {
+       for %i0 = 0 to %0 {
+         for %i1 = 0 to %1 {
+           for %i2 = 0 to %2 {
              %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
            }
          }
diff --git a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
index d00b99f1716..4654ab810df 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
@@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    %M = dim %A, 0 : memref<?x?x?xf32>
    %N = dim %A, 1 : memref<?x?x?xf32>
    %P = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: affine.for %i0 = 0 to %0 step 32
-   // CHECK:   affine.for %i1 = 0 to %1 {
-   // CHECK:     affine.for %i2 = 0 to %2 step 256
+   // CHECK: for %i0 = 0 to %0 step 32
+   // CHECK:   for %i1 = 0 to %1 {
+   // CHECK:     for %i2 = 0 to %2 step 256
    // CHECK:       {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   affine.for %i0 = 0 to %M {
-     affine.for %i1 = 0 to %N {
-       affine.for %i2 = 0 to %P {
+   for %i0 = 0 to %M {
+     for %i1 = 0 to %N {
+       for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
@@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
    // vectorization happens because of loop nesting order
-   affine.for %i3 = 0 to %M {
-     affine.for %i4 = 0 to %N {
-       affine.for %i5 = 0 to %P {
+   for %i3 = 0 to %M {
+     for %i4 = 0 to %N {
+       for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
diff --git a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
index a8a8d5d7790..0eebf816535 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
@@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
    // vectorization happens because of loop nesting order.
-   affine.for %i0 = 0 to %M {
-     affine.for %i1 = 0 to %N {
-       affine.for %i2 = 0 to %P {
+   for %i0 = 0 to %M {
+     for %i1 = 0 to %N {
+       for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
    }
-   // CHECK: affine.for %i3 = 0 to %0 step 32
-   // CHECK:   affine.for %i4 = 0 to %1 step 256
-   // CHECK:     affine.for %i5 = 0 to %2 {
+   // CHECK: for %i3 = 0 to %0 step 32
+   // CHECK:   for %i4 = 0 to %1 step 256
+   // CHECK:     for %i5 = 0 to %2 {
    // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   affine.for %i3 = 0 to %M {
-     affine.for %i4 = 0 to %N {
-       affine.for %i5 = 0 to %P {
+   for %i3 = 0 to %M {
+     for %i4 = 0 to %N {
+       for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
@@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
    %0 = dim %A, 0 : memref<?x?x?xf32>
    %1 = dim %A, 1 : memref<?x?x?xf32>
    %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: affine.for %i0 = 0 to %0 step 32 {
-   // CHECK:   affine.for %i1 = 0 to %1 {
-   // CHECK:     affine.for %i2 = 0 to %2 step 256 {
+   // CHECK: for %i0 = 0 to %0 step 32 {
+   // CHECK:   for %i1 = 0 to %1 {
+   // CHECK:     for %i2 = 0 to %2 step 256 {
    // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   affine.for %i3 = 0 to %1 step 256 {
-   // CHECK:     affine.for %i4 = 0 to %2 {
+   // CHECK:   for %i3 = 0 to %1 step 256 {
+   // CHECK:     for %i4 = 0 to %2 {
    // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     affine.for %i5 = 0 to %2 {
+   // CHECK:     for %i5 = 0 to %2 {
    // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   affine.for %i0 = 0 to %0 {
-     affine.for %i1 = 0 to %1 {
-       affine.for %i2 = 0 to %2 {
+   for %i0 = 0 to %0 {
+     for %i1 = 0 to %1 {
+       for %i2 = 0 to %2 {
          %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
        }
      }
-     affine.for %i3 = 0 to %1 {
-       affine.for %i4 = 0 to %2 {
+     for %i3 = 0 to %1 {
+       for %i4 = 0 to %2 {
          %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
        }
-       affine.for %i5 = 0 to %2 {
+       for %i5 = 0 to %2 {
          %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
        }
      }
diff --git a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
index b8e4e075890..1ba563b3442 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
@@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
    // vectorization happens because of loop nesting order.
-  affine.for %i0 = 0 to %M {
-     affine.for %i1 = 0 to %N {
-       affine.for %i2 = 0 to %P {
+  for %i0 = 0 to %M {
+     for %i1 = 0 to %N {
+       for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
    }
-   // CHECK: affine.for %i3 = 0 to %0 step 32
-   // CHECK:   affine.for %i4 = 0 to %1 {
-   // CHECK:     affine.for %i5 = 0 to %2 step 256
+   // CHECK: for %i3 = 0 to %0 step 32
+   // CHECK:   for %i4 = 0 to %1 {
+   // CHECK:     for %i5 = 0 to %2 step 256
    // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   affine.for %i3 = 0 to %M {
-     affine.for %i4 = 0 to %N {
-       affine.for %i5 = 0 to %P {
+   for %i3 = 0 to %M {
+     for %i4 = 0 to %N {
+       for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
@@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
    %0 = dim %A, 0 : memref<?x?x?xf32>
    %1 = dim %A, 1 : memref<?x?x?xf32>
    %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: affine.for %i0 = 0 to %0 step 32 {
-   // CHECK:   affine.for %i1 = 0 to %1 step 256 {
-   // CHECK:     affine.for %i2 = 0 to %2 {
+   // CHECK: for %i0 = 0 to %0 step 32 {
+   // CHECK:   for %i1 = 0 to %1 step 256 {
+   // CHECK:     for %i2 = 0 to %2 {
    // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   affine.for %i3 = 0 to %1 {
-   // CHECK:     affine.for %i4 = 0 to %2 step 256 {
+   // CHECK:   for %i3 = 0 to %1 {
+   // CHECK:     for %i4 = 0 to %2 step 256 {
    // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     affine.for %i5 = 0 to %2 step 256 {
+   // CHECK:     for %i5 = 0 to %2 step 256 {
    // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   affine.for %i0 = 0 to %0 {
-     affine.for %i1 = 0 to %1 {
-       affine.for %i2 = 0 to %2 {
+   for %i0 = 0 to %0 {
+     for %i1 = 0 to %1 {
+       for %i2 = 0 to %2 {
          %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
        }
      }
-     affine.for %i3 = 0 to %1 {
-       affine.for %i4 = 0 to %2 {
+     for %i3 = 0 to %1 {
+       for %i4 = 0 to %2 {
          %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
        }
-       affine.for %i5 = 0 to %2 {
+       for %i5 = 0 to %2 {
          %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
        }
      }
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index cc295751748..29accf4ffc1 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -213,10 +213,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
   // CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
   %c = alloc(%K, %N) : memref<? x ? x i32>
 
-  // CHECK: affine.for %i0 =
-  affine.for %i = 0 to %L {
-    // CHECK-NEXT: affine.for %i1 =
-    affine.for %j = 0 to 10 {
+  // CHECK: for %i0 =
+  for %i = 0 to %L {
+    // CHECK-NEXT: for %i1 =
+    for %j = 0 to 10 {
       // CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
       // CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
       %v = load %a[%i, %j] : memref<?x?xf32>
@@ -242,8 +242,8 @@ func @merge_constants() -> (index, index) {
 // CHECK-LABEL: func @hoist_constant
 func @hoist_constant(%arg0: memref<8xi32>) {
   // CHECK-NEXT: %c42_i32 = constant 42 : i32
-  // CHECK-NEXT: affine.for %i0 = 0 to 8 {
-  affine.for %i0 = 0 to 8 {
+  // CHECK-NEXT: for %i0 = 0 to 8 {
+  for %i0 = 0 to 8 {
     // CHECK-NEXT: store %c42_i32, %arg0[%i0]
     %c42_i32 = constant 42 : i32
     store %c42_i32, %arg0[%i0] : memref<8xi32>
diff --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
index 1c23914d7a2..6043e478c5a 100644
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -2,8 +2,8 @@
 
 // CHECK-LABEL: @test(%arg0: memref<f32>) {
 func @test(%p : memref<f32>) {
-  affine.for %i0 = 0 to 128 {
-    affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
+  for %i0 = 0 to 128 {
+    for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
       %0 = constant 4.5 : f32
       %1 = constant 1.5 : f32
 
diff --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir
index 31a7e13b73e..c4c0da7053e 100644
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -123,8 +123,8 @@ func @down_propagate_for_ml() {
   // CHECK: %c1_i32 = constant 1 : i32
   %0 = constant 1 : i32
 
-  // CHECK-NEXT: affine.for %i0 = 0 to 4 {
-  affine.for %i = 0 to 4 {
+  // CHECK-NEXT: for %i0 = 0 to 4 {
+  for %i = 0 to 4 {
     // CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
     %1 = constant 1 : i32
     "foo"(%0, %1) : (i32, i32) -> ()
@@ -155,8 +155,8 @@ func @down_propagate_cfg() -> i32 {
 /// Check that operation definitions are NOT propagated up the dominance tree.
 // CHECK-LABEL: @up_propagate_ml
 func @up_propagate_ml() -> i32 {
-  // CHECK: affine.for %i0 = 0 to 4 {
-  affine.for %i = 0 to 4 {
+  // CHECK: for %i0 = 0 to 4 {
+  for %i = 0 to 4 {
     // CHECK-NEXT: %c1_i32 = constant 1 : i32
     // CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
     %0 = constant 1 : i32
diff --git a/mlir/test/Transforms/dma-generate.mlir b/mlir/test/Transforms/dma-generate.mlir
index 864a61d3abd..a954bdb96a1 100644
--- a/mlir/test/Transforms/dma-generate.mlir
+++ b/mlir/test/Transforms/dma-generate.mlir
@@ -32,7 +32,7 @@ func @loop_nest_1d() {
   // Second DMA transfer.
   // CHECK:       dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 1>, memref<1xi32>
   // CHECK-NEXT:  dma_wait %6[%c0], %c256_0 : memref<1xi32>
-  // CHECK: affine.for %i0 = 0 to 256 {
+  // CHECK: for %i0 = 0 to 256 {
       // CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 1>
       // CHECK:      %8 = affine.apply [[MAP_PLUS_256]](%i0)
       // CHECK:      %9 = affine.apply [[MAP_MINUS_256]](%8)
@@ -41,7 +41,7 @@ func @loop_nest_1d() {
       // CHECK:     %11 = load %2[%i0] : memref<256xf32, 1>
   // CHECK-NEXT: }
   // CHECK-NEXT: return
-  affine.for %i = 0 to 256 {
+  for %i = 0 to 256 {
     load %A[%i] : memref<256 x f32>
     %idx = affine.apply (d0) -> (d0 + 256)(%i)
     load %B[%idx] : memref<512 x f32>
@@ -68,20 +68,20 @@ func @loop_nest_1d() {
 // INCOMING DMA for C.
 // CHECK-DAG:  dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 1>, memref<1xi32>
 // CHECK-DAG:  dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
-// CHECK-NEXT:  affine.for %i0 = 0 to 32 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 32 {
-// CHECK-NEXT:      affine.for %i2 = 0 to 32 {
-// CHECK-NEXT:        affine.for %i3 = 0 to 16 {
+// CHECK-NEXT:  for %i0 = 0 to 32 {
+// CHECK-NEXT:    for %i1 = 0 to 32 {
+// CHECK-NEXT:      for %i2 = 0 to 32 {
+// CHECK-NEXT:        for %i3 = 0 to 16 {
 // CHECK-NEXT:          %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
 // CHECK-NEXT:          %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 1>
 // CHECK-NEXT:          "foo"(%8) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        affine.for %i4 = 0 to 16 {
+// CHECK-NEXT:        for %i4 = 0 to 16 {
 // CHECK-NEXT:          %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
 // CHECK-NEXT:          %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 1>
 // CHECK-NEXT:          "bar"(%10) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        affine.for %i5 = 0 to 16 {
+// CHECK-NEXT:        for %i5 = 0 to 16 {
 // CHECK-NEXT:          %11 = "abc_compute"() : () -> f32
 // CHECK-NEXT:          %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
 // CHECK-NEXT:          %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 1>
@@ -102,20 +102,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
   // DMAs will be performed at this level (jT is the first loop without a stride).
   // A and B are read, while C is both read and written. A total of three new buffers
   // are allocated and existing load's/store's are replaced by accesses to those buffers.
-  affine.for %jT = 0 to 32 {
-    affine.for %kT = 0 to 32 {
-      affine.for %iT = 0 to 32 {
-        affine.for %kk = 0 to 16 { // k intratile
+  for %jT = 0 to 32 {
+    for %kT = 0 to 32 {
+      for %iT = 0 to 32 {
+        for %kk = 0 to 16 { // k intratile
           %k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
           %v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
           "foo"(%v0) : (f32) -> ()
         }
-        affine.for %ii = 0 to 16 { // i intratile.
+        for %ii = 0 to 16 { // i intratile.
           %i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
           %v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
           "bar"(%v1) : (f32) -> ()
         }
-        affine.for %ii_ = 0 to 16 { // i intratile.
+        for %ii_ = 0 to 16 { // i intratile.
           %v2 = "abc_compute"() : () -> f32
           %i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
           %v3 =  load %C[%i_, %jT] : memref<512 x 32 x f32>
@@ -134,13 +134,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 //
 // CHECK-LABEL: func @loop_nest_modulo() {
 // CHECK:       %0 = alloc() : memref<256x8xf32>
-// CHECK-NEXT:    affine.for %i0 = 0 to 32 step 4 {
+// CHECK-NEXT:    for %i0 = 0 to 32 step 4 {
 // CHECK-NEXT:      %1 = affine.apply #map{{[0-9]+}}(%i0)
 // CHECK-NEXT:      %2 = alloc() : memref<1x2xf32, 1>
 // CHECK-NEXT:      %3 = alloc() : memref<1xi32>
 // CHECK-NEXT:      dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 1>, memref<1xi32>
 // CHECK-NEXT:      dma_wait %3[%c0], %c2 : memref<1xi32>
-// CHECK-NEXT:      affine.for %i1 = 0 to 8 {
+// CHECK-NEXT:      for %i1 = 0 to 8 {
 //                    ...
 //                    ...
 // CHECK:           }
@@ -148,9 +148,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 // CHECK-NEXT:    return
 func @loop_nest_modulo() {
   %A = alloc() : memref<256 x 8 x f32>
-  affine.for %i = 0 to 32 step 4 {
+  for %i = 0 to 32 step 4 {
     // DMAs will be performed at this level (%j is the first unit stride loop)
-    affine.for %j = 0 to 8 {
+    for %j = 0 to 8 {
       %idx = affine.apply (d0) -> (d0 mod 2) (%j)
       // A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
       %v = load %A[%i, %idx] : memref<256 x 8 x f32>
@@ -164,17 +164,17 @@ func @loop_nest_modulo() {
 // CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
 func @loop_nest_tiled() -> memref<256x1024xf32> {
   %0 = alloc() : memref<256x1024xf32>
-  affine.for %i0 = 0 to 256 step 32 {
-    affine.for %i1 = 0 to 1024 step 32 {
+  for %i0 = 0 to 256 step 32 {
+    for %i1 = 0 to 1024 step 32 {
 // CHECK:      %3 = alloc() : memref<32x32xf32, 1>
 // CHECK-NEXT: %4 = alloc() : memref<1xi32>
 // Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
 // CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
 // CHECK-NEXT: dma_wait
-// CHECK-NEXT: affine.for %i2 = #map
-// CHECK-NEXT:   affine.for %i3 = #map
-      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
+// CHECK-NEXT: for %i2 = #map
+// CHECK-NEXT:   for %i3 = #map
+      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
+        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
           // CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
           // CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
           // CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 1>
@@ -196,8 +196,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
   // No strided DMA needed here.
   // CHECK:      dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 1>,
   // CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
-  affine.for %i = 0 to 100 {
-    affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
+  for %i = 0 to 100 {
+    for %j = 0 to ()[s0] -> (s0) ()[%N] {
       // CHECK:      %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
       // CHECK:      %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
       // CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 1>
@@ -210,8 +210,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
 // CHECK-LABEL: func @dma_with_symbolic_accesses
 func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
   %N = constant 9 : index
-  affine.for %i = 0 to 100 {
-    affine.for %j = 0 to 100 {
+  for %i = 0 to 100 {
+    for %j = 0 to 100 {
       %idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
       load %A[%i, %idy] : memref<100 x 100 x f32>
     }
@@ -221,8 +221,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
 // CHECK-NEXT:  %2 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
 // CHECK-NEXT:  dma_wait %2[%c0], %c10000
-// CHECK-NEXT:  affine.for %i0 = 0 to 100 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 100 {
+// CHECK-NEXT:  for %i0 = 0 to 100 {
+// CHECK-NEXT:    for %i1 = 0 to 100 {
 // CHECK-NEXT:      %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
 // CHECK-NEXT:      %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
@@ -241,8 +241,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 // CHECK-NEXT:  %1 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %1[%c0], %c10000 : memref<1xi32>
-  affine.for %i = 0 to 100 {
-    affine.for %j = %M to %N {
+  for %i = 0 to 100 {
+    for %j = %M to %N {
       %idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
       load %A[%i, %idy] : memref<100 x 100 x f32>
     }
@@ -256,8 +256,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 func @dma_unknown_size(%arg0: memref<?x?xf32>) {
   %M = dim %arg0, 0 : memref<? x ? x f32>
   %N = dim %arg0, 0 : memref<? x ? x f32>
-  affine.for %i = 0 to %M {
-    affine.for %j = 0 to %N {
+  for %i = 0 to %M {
+    for %j = 0 to %N {
       // If this loop nest isn't tiled, the access requires a non-constant DMA
       // size -- not yet implemented.
       // CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
@@ -272,9 +272,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
 
 // CHECK-LABEL: func @dma_memref_3d
 func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
-  affine.for %i = 0 to 1024 {
-    affine.for %j = 0 to 1024 {
-      affine.for %k = 0 to 1024 {
+  for %i = 0 to 1024 {
+    for %j = 0 to 1024 {
+      for %k = 0 to 1024 {
         %idx = affine.apply (d0) -> (d0 mod 128)(%i)
         %idy = affine.apply (d0) -> (d0 mod 128)(%j)
         %idz = affine.apply (d0) -> (d0 mod 128)(%k)
@@ -308,8 +308,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
 // CHECK-LABEL: func @multi_load_store_union() {
 func @multi_load_store_union() {
   %A = alloc() : memref<512 x 512 x f32>
-  affine.for %i = 0 to 256 {
-    affine.for %j = 0 to 256 {
+  for %i = 0 to 256 {
+    for %j = 0 to 256 {
       %idx = affine.apply (d0) -> (d0 + 64)(%i)
       %idy = affine.apply (d0) -> (d0 + 128)(%j)
       %ishift = affine.apply (d0) -> (d0 + 2)(%i)
@@ -333,8 +333,8 @@ func @multi_load_store_union() {
 // CHECK-NEXT:  dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %2[%c0], %c170372_3 : memref<1xi32>
 // CHECK-NEXT:  %3 = alloc() : memref<1xi32>
-// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 256 {
+// CHECK-NEXT:  for %i0 = 0 to 256 {
+// CHECK-NEXT:    for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = affine.apply [[MAP_PLUS_64]](%i0)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_PLUS_128]](%i1)
 // CHECK-NEXT:      %6 = affine.apply [[MAP_PLUS_2]](%i0)
@@ -370,7 +370,7 @@ func @dma_loop_straightline_interspersed() {
   %c255 = constant 255 : index
   %A = alloc() : memref<256 x f32>
   %v = load %A[%c0] : memref<256 x f32>
-  affine.for %i = 1 to 255 {
+  for %i = 1 to 255 {
     load %A[%i] : memref<256 x f32>
   }
   %l = load %A[%c255] : memref<256 x f32>
@@ -389,7 +389,7 @@ func @dma_loop_straightline_interspersed() {
 // CHECK-NEXT:  %5 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %5[%c0], %c254 : memref<1xi32>
-// CHECK-NEXT:  affine.for %i0 = 1 to 255 {
+// CHECK-NEXT:  for %i0 = 1 to 255 {
 // CHECK-NEXT:    %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
 // CHECK-NEXT:    %7 = load %4[%6] : memref<254xf32, 1>
 // CHECK-NEXT:  }
@@ -410,10 +410,10 @@ func @dma_loop_straightline_interspersed() {
 func @dma_mixed_loop_blocks() {
   %c0 = constant 0 : index
   %A = alloc() : memref<256 x 256 x vector<8 x f32>>
-  affine.for %i = 0 to 256 {
+  for %i = 0 to 256 {
     %v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
     "foo"(%v) : (vector<8 x f32>) -> ()
-    affine.for %j = 0 to 256 {
+    for %j = 0 to 256 {
       %w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
       "bar"(%w) : (vector<8 x f32>) -> ()
     }
@@ -425,7 +425,7 @@ func @dma_mixed_loop_blocks() {
 // CHECK-DAG:   [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
 // CHECK:       dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
-// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
+// CHECK-NEXT:  for %i0 = 0 to 256 {
 // CHECK-NEXT:    %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 1>
-// CHECK:         affine.for %i1 = 0 to 256 {
+// CHECK:         for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 1>
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index 7fbf7097be3..439e93137a4 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -16,13 +16,13 @@ func @should_fuse_raw_dep_for_locality() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   %1 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %0[%1] : memref<1xf32>
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
@@ -44,23 +44,23 @@ func @should_fuse_reduction_to_pointwise() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
       %v0 = load %b[%i0] : memref<10xf32>
       %v1 = load %a[%i0, %i1] : memref<10x10xf32>
       %v3 = addf %v0, %v1 : f32
       store %v3, %b[%i0] : memref<10xf32>
     }
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v4 = load %b[%i2] : memref<10xf32>
     store %v4, %c[%i2] : memref<10xf32>
   }
 
   // Should fuse in entire inner loop on %i1 from source loop nest, as %i1
   // is not used in the access function of the store/load on %b.
-  // CHECK:       affine.for %i0 = 0 to 10 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK-NEXT:    for %i1 = 0 to 10 {
   // CHECK-NEXT:      %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %4 = load %0[%3] : memref<1xf32>
   // CHECK-NEXT:      %5 = load %1[%i0, %i1] : memref<10x10xf32>
@@ -88,15 +88,15 @@ func @should_fuse_loop_nests_with_shifts() {
   %a = alloc() : memref<10x10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 9 {
-    affine.for %i1 = 0 to 9 {
+  for %i0 = 0 to 9 {
+    for %i1 = 0 to 9 {
       %idx = affine.apply (d0) -> (d0 + 1) (%i0)
       %idy = affine.apply (d0) -> (d0 + 1) (%i1)
       store %cf7, %a[%idx, %idy] : memref<10x10xf32>
     }
   }
-  affine.for %i2 = 1 to 10 {
-    affine.for %i3 = 1 to 10 {
+  for %i2 = 1 to 10 {
+    for %i3 = 1 to 10 {
       %v0 = load %a[%i2, %i3] : memref<10x10xf32>
     }
   }
@@ -109,8 +109,8 @@ func @should_fuse_loop_nests_with_shifts() {
   // *) Fifth affine apply shifts the loads access function by '-1', because
   //    of the offset induced by reducing the memref shape from 10x10 to 9x9.
   // NOTE: Should create a private memref with reduced shape 9x9xf32.
-  // CHECK:      affine.for %i0 = 1 to 10 {
-  // CHECK-NEXT:   affine.for %i1 = 1 to 10 {
+  // CHECK:      for %i0 = 1 to 10 {
+  // CHECK-NEXT:   for %i1 = 1 to 10 {
   // CHECK-NEXT:     %1 = affine.apply [[MAP_SHIFT_MINUS_ONE_R1]](%i0)
   // CHECK-NEXT:     %2 = affine.apply [[MAP_SHIFT_MINUS_ONE_R1]](%i1)
   // CHECK-NEXT:     %3 = affine.apply [[MAP_SHIFT_BY_ONE]](%1)
@@ -138,27 +138,27 @@ func @should_fuse_loop_nest() {
   %b = alloc() : memref<10x10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
       store %cf7, %a[%i0, %i1] : memref<10x10xf32>
     }
   }
-  affine.for %i2 = 0 to 10 {
-    affine.for %i3 = 0 to 10 {
+  for %i2 = 0 to 10 {
+    for %i3 = 0 to 10 {
       %v0 = load %a[%i3, %i2] : memref<10x10xf32>
       store %v0, %b[%i2, %i3] : memref<10x10xf32>
     }
   }
-  affine.for %i4 = 0 to 10 {
-    affine.for %i5 = 0 to 10 {
+  for %i4 = 0 to 10 {
+    for %i5 = 0 to 10 {
       %v1 = load %b[%i4, %i5] : memref<10x10xf32>
     }
   }
   // Expecting private memref for '%a' first, then private memref for '%b'.
   // CHECK-DAG:  [[NEWA:%[0-9]+]] = alloc() : memref<1x1xf32>
   // CHECK-DAG:  [[NEWB:%[0-9]+]] = alloc() : memref<1x1xf32>
-  // CHECK:      affine.for %i0 = 0 to 10 {
-  // CHECK-NEXT:   affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK-NEXT:   for %i1 = 0 to 10 {
   // CHECK-NEXT:     %2 = affine.apply [[MAP_D2_D0_DIFF]](%i1, %i0, %i1, %i0)
   // CHECK-NEXT:     %3 = affine.apply [[MAP_D3_D1_DIFF]](%i1, %i0, %i1, %i0)
   // CHECK-NEXT:     store %cst, [[NEWA]][%2, %3] : memref<1x1xf32>
@@ -189,23 +189,23 @@ func @should_fuse_across_intermediate_loop_with_no_deps() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %v0, %b[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %c[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v1 = load %b[%i2] : memref<10xf32>
   }
 
   // Should fuse first loop (past second loop with no dependences) into third.
   // Note that fusion creates a private memref '%2' for the fused loop nest.
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %2[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i1 = 0 to 10 {
   // CHECK-NEXT:   %3 = load %1[%i1] : memref<10xf32>
   // CHECK-NEXT:   %4 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:   store %3, %0[%4] : memref<1xf32>
@@ -227,13 +227,13 @@ func @should_fuse_all_loops() {
   %cf7 = constant 7.0 : f32
 
   // Set up flow dependences from first and second loops to third.
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %b[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v0 = load %a[%i2] : memref<10xf32>
     %v1 = load %b[%i2] : memref<10xf32>
   }
@@ -242,7 +242,7 @@ func @should_fuse_all_loops() {
   // Expecting private memref for '%a' first, then private memref for '%b'.
   // CHECK-DAG: [[NEWA:%[0-9]+]] = alloc() : memref<1xf32>
   // CHECK-DAG: [[NEWB:%[0-9]+]] = alloc() : memref<1xf32>
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, [[NEWA]][%2] : memref<1xf32>
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i0, %i0)
@@ -268,27 +268,27 @@ func @should_fuse_first_and_second_loops() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %a[%i1] : memref<10xf32>
     store %cf7, %b[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v1 = load %c[%i2] : memref<10xf32>
   }
 
   // Should fuse first loop into the second (last loop should not be fused).
   // Should create private memref '%2' for fused loop.
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %0[%3] : memref<1xf32>
   // CHECK-NEXT:   %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   %5 = load %0[%4] : memref<1xf32>
   // CHECK-NEXT:   store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i1 = 0 to 10 {
   // CHECK-NEXT:   %6 = load %2[%i1] : memref<10xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return
@@ -310,28 +310,28 @@ func @should_not_fuse_would_create_cycle() {
   // 1) loop0 -> loop1 on memref '%a'
   // 2) loop0 -> loop2 on memref '%b'
   // 3) loop1 -> loop2 on memref '%c'
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %cf7, %b[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %a[%i1] : memref<10xf32>
     %v1 = load %c[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v2 = load %b[%i2] : memref<10xf32>
     store %cf7, %c[%i2] : memref<10xf32>
   }
   // Should not fuse: fusing loop first loop into last would create a cycle.
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   %3 = load %0[%i0] : memref<10xf32>
   // CHECK-NEXT:   store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i1 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i1] : memref<10xf32>
   // CHECK-NEXT:   %4 = load %2[%i1] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i2 = 0 to 10 {
+  // CHECK:      for %i2 = 0 to 10 {
   // CHECK-NEXT:   %5 = load %1[%i2] : memref<10xf32>
   // CHECK-NEXT:   store %cst, %2[%i2] : memref<10xf32>
   // CHECK-NEXT: }
@@ -346,23 +346,23 @@ func @should_not_fuse_across_waw_dep() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %m[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v1 = load %m[%i2] : memref<10xf32>
   }
   // Fusing loop %i0 to %i2 would violate the WAW dependence between %i0 and %i1
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i1 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i1] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i2 = 0 to 10 {
+  // CHECK:      for %i2 = 0 to 10 {
   // CHECK-NEXT:   %1 = load %0[%i2] : memref<10xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return
@@ -379,27 +379,27 @@ func @should_fuse_and_move_to_preserve_war_dep() {
   %b = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %v0, %b[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %a[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v1 = load %b[%i2] : memref<10xf32>
   }
   // Loops '%i1' and '%i2' have no dependences. We can fuse a slice of '%i0'
   // into '%i2' if we move the fused loop nest before '%i1', which preserves
   // the WAR dependence from load '%a' in '%i0' to the store '%a' in loop '%i1'.
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    %2 = load %1[%i0] : memref<10xf32>
   // CHECK-NEXT:    %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %2, %0[%3] : memref<1xf32>
   // CHECK-NEXT:    %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    %5 = load %0[%4] : memref<1xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %1[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -415,20 +415,20 @@ func @should_fuse_with_private_memref_if_top_level_access() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
 
   %c0 = constant 4 : index
   %v1 = load %m[%c0] : memref<10xf32>
   // Top-level load to '%m' should prevent fusion.
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK-NEXT: affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT: for %i1 = 0 to 10 {
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:   store %cst, %0[%2] : memref<1xf32>
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i1, %i1)
@@ -446,13 +446,13 @@ func @should_fuse_no_top_level_access() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   %1 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %0[%1] : memref<1xf32>
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
@@ -471,20 +471,20 @@ func @should_not_fuse_if_inst_at_top_level() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
   %c0 = constant 4 : index
   affine.if #set0(%c0) {
   }
   // Top-level IfOp should prevent fusion.
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i1 = 0 to 10 {
   // CHECK-NEXT:   %1 = load %0[%i1] : memref<10xf32>
   // CHECK-NEXT: }
   return
@@ -500,20 +500,20 @@ func @should_not_fuse_if_inst_in_loop_nest() {
   %cf7 = constant 7.0 : f32
   %c4 = constant 4 : index
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     affine.if #set0(%c4) {
     }
     %v0 = load %m[%i1] : memref<10xf32>
   }
 
   // IfOp in ForInst should prevent fusion.
-  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK:      for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      affine.for %i1 = 0 to 10 {
+  // CHECK:      for %i1 = 0 to 10 {
   // CHECK-NEXT:   affine.if #set0(%c4) {
   // CHECK-NEXT:   }  
   // CHECK-NEXT:   %1 = load %0[%i1] : memref<10xf32>
@@ -532,24 +532,24 @@ func @permute_and_fuse() {
   %m = alloc() : memref<10x20x30xf32>
 
   %cf7 = constant 7.0 : f32
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 20 {
-      affine.for %i2 = 0 to 30 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 20 {
+      for %i2 = 0 to 30 {
         store %cf7, %m[%i0, %i1, %i2] : memref<10x20x30xf32>
       }
     }
   }
-  affine.for %i3 = 0 to 30 {
-    affine.for %i4 = 0 to 10 {
-      affine.for %i5 = 0 to 20 {
+  for %i3 = 0 to 30 {
+    for %i4 = 0 to 10 {
+      for %i5 = 0 to 20 {
         %v0 = load %m[%i4, %i5, %i3] : memref<10x20x30xf32>
         "foo"(%v0) : (f32) -> ()
       }
     }
   }
-// CHECK:       affine.for %i0 = 0 to 30 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 10 {
-// CHECK-NEXT:      affine.for %i2 = 0 to 20 {
+// CHECK:       for %i0 = 0 to 30 {
+// CHECK-NEXT:    for %i1 = 0 to 10 {
+// CHECK-NEXT:      for %i2 = 0 to 20 {
 // CHECK-NEXT:        %1 = affine.apply [[MAP0]](%i1, %i2, %i0, %i1, %i2, %i0)
 // CHECK-NEXT:        %2 = affine.apply [[MAP1]](%i1, %i2, %i0, %i1, %i2, %i0)
 // CHECK-NEXT:        %3 = affine.apply [[MAP2]](%i1, %i2, %i0, %i1, %i2, %i0)
@@ -578,22 +578,22 @@ func @permute_and_fuse() {
 func @fuse_reshape_64_16_4(%in : memref<64xf32>) {
   %out = alloc() : memref<16x4xf32>
 
-  affine.for %i0 = 0 to 64 {
+  for %i0 = 0 to 64 {
     %v = load %in[%i0] : memref<64xf32>
     %idx = affine.apply (d0) -> (d0 floordiv 4) (%i0)
     %idy = affine.apply (d0) -> (d0 mod 4) (%i0)
     store %v, %out[%idx, %idy] : memref<16x4xf32>
   }
 
-  affine.for %i1 = 0 to 16 {
-    affine.for %i2 = 0 to 4 {
+  for %i1 = 0 to 16 {
+    for %i2 = 0 to 4 {
       %w = load %out[%i1, %i2] : memref<16x4xf32>
       "foo"(%w) : (f32) -> ()
     }
   }
   return
-  // CHECK:      affine.for %i0 =
-  // CHECK-NEXT:   affine.for %i1 =
+  // CHECK:      for %i0 =
+  // CHECK-NEXT:   for %i1 =
   // CHECK-NOT:    for
   // CHECK:        }
   // CHECK-NEXT: }
@@ -612,19 +612,19 @@ func @fuse_reshape_16_4_64() {
   %in = alloc() : memref<16x4xf32>
   %out = alloc() : memref<64xf32>
 
-  affine.for %i0 = 0 to 16 {
-    affine.for %i1 = 0 to 4 {
+  for %i0 = 0 to 16 {
+    for %i1 = 0 to 4 {
       %v = load %in[%i0, %i1] : memref<16x4xf32>
       %idx = affine.apply (d0, d1) -> (4*d0 + d1) (%i0, %i1)
       store %v, %out[%idx] : memref<64xf32>
     }
   }
 
-  affine.for %i2 = 0 to 64 {
+  for %i2 = 0 to 64 {
     %w = load %out[%i2] : memref<64xf32>
     "foo"(%w) : (f32) -> ()
   }
-// CHECK:       affine.for %i0 = 0 to 64 {
+// CHECK:       for %i0 = 0 to 64 {
 // CHECK-NEXT:    %2 = affine.apply [[MAP0]](%i0)
 // CHECK-NEXT:    %3 = affine.apply [[MAP1]](%i0)
 // CHECK-NEXT:    %4 = load %1[%2, %3] : memref<16x4xf32>
@@ -650,12 +650,12 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
   %live_out = alloc() : memref<64x9xi32>
 
   // Initialize input.
-  affine.for %i0 = 0 to 2 {
-    affine.for %i1 = 0 to 2 {
-      affine.for %i2 = 0 to 3 {
-        affine.for %i3 = 0 to 3 {
-          affine.for %i4 = 0 to 16 {
-            affine.for %i5 = 0 to 1 {
+  for %i0 = 0 to 2 {
+    for %i1 = 0 to 2 {
+      for %i2 = 0 to 3 {
+        for %i3 = 0 to 3 {
+          for %i4 = 0 to 16 {
+            for %i5 = 0 to 1 {
               %val = "foo"(%i0, %i1, %i2, %i3, %i4, %i5) : (index, index, index, index, index, index) -> i32
               store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
             }
@@ -665,8 +665,8 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
     }
   }
 
-  affine.for %ii = 0 to 64 {
-    affine.for %jj = 0 to 9 {
+  for %ii = 0 to 64 {
+    for %jj = 0 to 9 {
       // Convert output coordinates to linear index.
       %a0 = affine.apply (d0, d1) -> (d0 * 9 + d1) (%ii, %jj)
       %0 = affine.apply (d0) -> (d0 floordiv (2 * 3 * 3 * 16 * 1))(%a0)
@@ -680,8 +680,8 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
     }
   }
 
-  affine.for %i = 0 to 64 {
-    affine.for %j = 0 to 9 {
+  for %i = 0 to 64 {
+    for %j = 0 to 9 {
       %a = load %out[%i, %j] : memref<64x9xi32>
       %b = muli %a, %a : i32
       store %b, %live_out[%i, %j] : memref<64x9xi32>
@@ -717,8 +717,8 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
 // CHECK:       %0 = alloc() : memref<1x2x3x3x16x1xi32>
 // CHECK:       %1 = alloc() : memref<1x1xi32>
 // CHECK:       %2 = alloc() : memref<64x9xi32>
-// CHECK-NEXT:  affine.for %i0 = 0 to 64 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 9 {
+// CHECK-NEXT:  for %i0 = 0 to 64 {
+// CHECK-NEXT:    for %i1 = 0 to 9 {
 // CHECK-NEXT:      %3 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %4 = affine.apply [[MAP1]](%i0, %i1)
 // CHECK-NEXT:      %5 = affine.apply [[MAP2]](%i0, %i1)
@@ -768,14 +768,14 @@ func @fuse_symbolic_bounds(%M : index, %N : index) {
   %c0 = constant 0.0 : f32
   %s = constant 5 : index
 
-  affine.for %i0 = 0 to %M {
-    affine.for %i1 = 0 to (d0) -> (d0 + 5) (%N) {
+  for %i0 = 0 to %M {
+    for %i1 = 0 to (d0) -> (d0 + 5) (%N) {
       store %c0, %m[%i0, %i1] : memref<? x ? x f32>
     }
   }
 
-  affine.for %i2 = 0 to %M {
-    affine.for %i3 = 0 to %N {
+  for %i2 = 0 to %M {
+    for %i3 = 0 to %N {
       %idy = affine.apply (d0)[s0] -> (d0 + s0) (%i3)[%s]
       %v = load %m[%i2, %idy] : memref<? x ? x f32>
     }
@@ -792,16 +792,16 @@ func @should_fuse_reduction_at_depth1() {
   %a = alloc() : memref<10x100xf32>
   %b = alloc() : memref<10xf32>
 
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 100 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 100 {
       %v0 = load %b[%i0] : memref<10xf32>
       %v1 = load %a[%i0, %i1] : memref<10x100xf32>
       %v2 = "maxf"(%v0, %v1) : (f32, f32) -> f32
       store %v2, %b[%i0] : memref<10xf32>
     }
   }
-  affine.for %i2 = 0 to 10 {
-    affine.for %i3 = 0 to 100 {
+  for %i2 = 0 to 10 {
+    for %i3 = 0 to 100 {
       %v3 = load %b[%i2] : memref<10xf32>
       %v4 = load %a[%i2, %i3] : memref<10x100xf32>
       %v5 = subf %v4, %v3 : f32
@@ -812,8 +812,8 @@ func @should_fuse_reduction_at_depth1() {
   // loop nest, which improves locality and enables subsequence passes to
   // decrease the reduction memref size and possibly place it in a faster
   // memory space.
-  // CHECK:       affine.for %i0 = 0 to 10 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 100 {
+  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK-NEXT:    for %i1 = 0 to 100 {
   // CHECK-NEXT:      %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %3 = load %0[%2] : memref<1xf32>
   // CHECK-NEXT:      %4 = load %1[%i0, %i1] : memref<10x100xf32>
@@ -821,7 +821,7 @@ func @should_fuse_reduction_at_depth1() {
   // CHECK-NEXT:      %6 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      store %5, %0[%6] : memref<1xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i2 = 0 to 100 {
+  // CHECK-NEXT:    for %i2 = 0 to 100 {
   // CHECK-NEXT:      %7 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %8 = load %0[%7] : memref<1xf32>
   // CHECK-NEXT:      %9 = load %1[%i0, %i2] : memref<10x100xf32>
@@ -843,19 +843,19 @@ func @should_fuse_at_src_depth1_and_dst_depth1() {
   %a = alloc() : memref<100x16xf32>
   %b = alloc() : memref<100x16xf32>
 
-  affine.for %i0 = 0 to 100 {
-    affine.for %i1 = 0 to 16 {
+  for %i0 = 0 to 100 {
+    for %i1 = 0 to 16 {
       %v0 = load %a[%i0, %i1] : memref<100x16xf32>
       "op0"(%v0) : (f32) -> ()
     }
-    affine.for %i2 = 0 to 16 {
+    for %i2 = 0 to 16 {
       %v1 = "op1"() : () -> (f32)
       store %v1, %b[%i0, %i2] : memref<100x16xf32>
     }
   }
 
-  affine.for %i3 = 0 to 100 {
-    affine.for %i4 = 0 to 16 {
+  for %i3 = 0 to 100 {
+    for %i4 = 0 to 16 {
       %v2 = load %b[%i3, %i4] : memref<100x16xf32>
       "op2"(%v2) : (f32) -> ()
     }
@@ -865,18 +865,18 @@ func @should_fuse_at_src_depth1_and_dst_depth1() {
   // destination loop nest at depth2 causes extra computation. Instead,
   // the fusion algorithm should detect that the source loop should be sliced
   // at depth 1 and the slice should be inserted at depth 1.
-  // CHECK:       affine.for %i0 = 0 to 100 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 16 {
+  // CHECK:       for %i0 = 0 to 100 {
+  // CHECK-NEXT:    for %i1 = 0 to 16 {
   // CHECK-NEXT:      %2 = load %1[%i0, %i1] : memref<100x16xf32>
   // CHECK-NEXT:      "op0"(%2) : (f32) -> ()
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i2 = 0 to 16 {
+  // CHECK-NEXT:    for %i2 = 0 to 16 {
   // CHECK-NEXT:      %3 = "op1"() : () -> f32
   // CHECK-NEXT:      %4 = affine.apply [[MAP0]](%i0, %i0, %i2)
   // CHECK-NEXT:      %5 = affine.apply [[MAP1]](%i0, %i0, %i2)
   // CHECK-NEXT:      store %3, %0[%4, %5] : memref<1x16xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i3 = 0 to 16 {
+  // CHECK-NEXT:    for %i3 = 0 to 16 {
   // CHECK-NEXT:      %6 = affine.apply [[MAP0]](%i0, %i0, %i3)
   // CHECK-NEXT:      %7 = affine.apply [[MAP1]](%i0, %i0, %i3)
   // CHECK-NEXT:      %8 = load %0[%6, %7] : memref<1x16xf32>
@@ -896,20 +896,20 @@ func @should_fuse_src_depth1_at_dst_depth2() {
   %a = alloc() : memref<100xf32>
   %c0 = constant 0.0 : f32
 
-  affine.for %i0 = 0 to 100 {
+  for %i0 = 0 to 100 {
     store %c0, %a[%i0] : memref<100xf32>
   }
 
-  affine.for %i1 = 0 to 10 {
-    affine.for %i2 = 0 to 10 {
+  for %i1 = 0 to 10 {
+    for %i2 = 0 to 10 {
       %a0 = affine.apply (d0, d1) -> (d0 * 10 + d1) (%i1, %i2)
       %v0 = load %a[%a0] : memref<100xf32>
     }
   }
   // The source loop nest slice loop bound is a function of both destination
   // loop IVs, so we should slice at depth 1 and insert the slice at depth 2.
-  // CHECK:       affine.for %i0 = 0 to 10 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK-NEXT:    for %i1 = 0 to 10 {
   // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
   // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1, %1)
   // CHECK-NEXT:      store %cst, %0[%2] : memref<1xf32>
@@ -930,10 +930,10 @@ func @fusion_at_depth0_not_currently_supported() {
   %0 = alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cst = constant 0.000000e+00 : f32
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cst, %0[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %1 = load %0[%c0] : memref<10xf32>
   }
   // NOTE: Should shrink memref size to 1 element access by load in dst loop
@@ -966,18 +966,18 @@ func @should_fuse_deep_loop_nests() {
   %c1 = constant 1 : index
   %c1_0 = constant 1 : index
   %cst = constant 0.000000e+00 : f32
-  affine.for %i0 = 0 to 2 {
-    affine.for %i1 = 0 to 2 {
-      affine.for %i2 = 0 to 3 {
-        affine.for %i3 = 0 to 3 {
-          affine.for %i4 = 0 to 16 {
-            affine.for %i5 = 0 to 10 {
+  for %i0 = 0 to 2 {
+    for %i1 = 0 to 2 {
+      for %i2 = 0 to 3 {
+        for %i3 = 0 to 3 {
+          for %i4 = 0 to 16 {
+            for %i5 = 0 to 10 {
               %3 = load %0[%i0, %i1, %i2, %i3, %i4, %i5]
                 : memref<2x2x3x3x16x10xf32, 2>
             }
           }
-          affine.for %i6 = 0 to 16 {
-            affine.for %i7 = 0 to 10 {
+          for %i6 = 0 to 16 {
+            for %i7 = 0 to 10 {
               store %cst, %1[%i0, %i1, %i2, %i3, %i6, %i7]
                 : memref<2x2x3x3x16x10xf32, 2>
             }
@@ -986,22 +986,22 @@ func @should_fuse_deep_loop_nests() {
       }
     }
   }
-  affine.for %i8 = 0 to 3 {
-    affine.for %i9 = 0 to 3 {
-      affine.for %i10 = 0 to 2 {
-        affine.for %i11 = 0 to 2 {
-          affine.for %i12 = 0 to 3 {
-            affine.for %i13 = 0 to 3 {
-              affine.for %i14 = 0 to 2 {
-                affine.for %i15 = 0 to 2 {
-                  affine.for %i16 = 0 to 16 {
-                    affine.for %i17 = 0 to 10 {
+  for %i8 = 0 to 3 {
+    for %i9 = 0 to 3 {
+      for %i10 = 0 to 2 {
+        for %i11 = 0 to 2 {
+          for %i12 = 0 to 3 {
+            for %i13 = 0 to 3 {
+              for %i14 = 0 to 2 {
+                for %i15 = 0 to 2 {
+                  for %i16 = 0 to 16 {
+                    for %i17 = 0 to 10 {
                       %5 = load %0[%i14, %i15, %i12, %i13, %i16, %i17]
                         : memref<2x2x3x3x16x10xf32, 2>
                     }
                   }
-                  affine.for %i18 = 0 to 16 {
-                    affine.for %i19 = 0 to 10 {
+                  for %i18 = 0 to 16 {
+                    for %i19 = 0 to 10 {
                       %6 = load %1[%i10, %i11, %i8, %i9, %i18, %i19]
                         : memref<2x2x3x3x16x10xf32, 2>
                     }
@@ -1019,19 +1019,19 @@ func @should_fuse_deep_loop_nests() {
 // where the destination loops nests have been interchanged.
 
 // CHECK-DAG:   %0 = alloc() : memref<1x1x1x1x16x10xf32, 2>
-// CHECK:       affine.for %i0 = 0 to 3 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 3 {
-// CHECK-NEXT:      affine.for %i2 = 0 to 2 {
-// CHECK-NEXT:        affine.for %i3 = 0 to 2 {
-// CHECK-NEXT:          affine.for %i4 = 0 to 3 {
-// CHECK-NEXT:            affine.for %i5 = 0 to 3 {
-// CHECK-NEXT:              affine.for %i6 = 0 to 16 {
-// CHECK-NEXT:                affine.for %i7 = 0 to 10 {
+// CHECK:       for %i0 = 0 to 3 {
+// CHECK-NEXT:    for %i1 = 0 to 3 {
+// CHECK-NEXT:      for %i2 = 0 to 2 {
+// CHECK-NEXT:        for %i3 = 0 to 2 {
+// CHECK-NEXT:          for %i4 = 0 to 3 {
+// CHECK-NEXT:            for %i5 = 0 to 3 {
+// CHECK-NEXT:              for %i6 = 0 to 16 {
+// CHECK-NEXT:                for %i7 = 0 to 10 {
 // CHECK-NEXT:                  %3 = load %1[%i2, %i3, %i0, %i1, %i6, %i7] : memref<2x2x3x3x16x10xf32, 2>
 // CHECK-NEXT:                }
 // CHECK-NEXT:              }
-// CHECK-NEXT:              affine.for %i8 = 0 to 16 {
-// CHECK-NEXT:                affine.for %i9 = 0 to 10 {
+// CHECK-NEXT:              for %i8 = 0 to 16 {
+// CHECK-NEXT:                for %i9 = 0 to 10 {
 // CHECK-NEXT:                  %4 = affine.apply [[MAP0]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i8, %i9)
 // CHECK-NEXT:                  %5 = affine.apply [[MAP1]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i8, %i9)
 // CHECK-NEXT:                  %6 = affine.apply [[MAP2]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i8, %i9)
@@ -1041,15 +1041,15 @@ func @should_fuse_deep_loop_nests() {
 // CHECK-NEXT:                  store %cst, %0[%4, %5, %6, %7, %8, %9] : memref<1x1x1x1x16x10xf32, 2>
 // CHECK-NEXT:                }
 // CHECK-NEXT:              }
-// CHECK-NEXT:              affine.for %i10 = 0 to 2 {
-// CHECK-NEXT:                affine.for %i11 = 0 to 2 {
-// CHECK-NEXT:                  affine.for %i12 = 0 to 16 {
-// CHECK-NEXT:                    affine.for %i13 = 0 to 10 {
+// CHECK-NEXT:              for %i10 = 0 to 2 {
+// CHECK-NEXT:                for %i11 = 0 to 2 {
+// CHECK-NEXT:                  for %i12 = 0 to 16 {
+// CHECK-NEXT:                    for %i13 = 0 to 10 {
 // CHECK-NEXT:                      %10 = load %1[%i10, %i11, %i4, %i5, %i12, %i13] : memref<2x2x3x3x16x10xf32, 2>
 // CHECK-NEXT:                    }
 // CHECK-NEXT:                  }
-// CHECK-NEXT:                  affine.for %i14 = 0 to 16 {
-// CHECK-NEXT:                    affine.for %i15 = 0 to 10 {
+// CHECK-NEXT:                  for %i14 = 0 to 16 {
+// CHECK-NEXT:                    for %i15 = 0 to 10 {
 // CHECK-NEXT:                      %11 = affine.apply [[MAP0]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i14, %i15)
 // CHECK-NEXT:                      %12 = affine.apply [[MAP1]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i14, %i15)
 // CHECK-NEXT:                      %13 = affine.apply [[MAP2]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i14, %i15)
@@ -1083,17 +1083,17 @@ func @should_fuse_at_depth1_and_reduce_slice_trip_count() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  affine.for %i0 = 0 to 4 {
-    affine.for %i1 = 0 to 256 {
+  for %i0 = 0 to 4 {
+    for %i1 = 0 to 256 {
       %v0 = load %b[%i0, %i1] : memref<4x256xf32>
     }
-    affine.for %i2 = 0 to 256 {
+    for %i2 = 0 to 256 {
       store %cf0, %a[%i0, %i2] : memref<4x256xf32>
     }
   }
 
-  affine.for %d0 = 0 to 4 {
-    affine.for %d1 = 0 to 16 {
+  for %d0 = 0 to 4 {
+    for %d1 = 0 to 16 {
       %v1 = load %a[%d0, %d1] : memref<4x256xf32>
     }
   }
@@ -1107,16 +1107,16 @@ func @should_fuse_at_depth1_and_reduce_slice_trip_count() {
   // is reduced from the original shape from 4x256 to 4x16 because of the
   // data accessed by the load.
   // CHECK-DAG:   %0 = alloc() : memref<1x16xf32>
-  // CHECK:       affine.for %i0 = 0 to 4 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 256 {
+  // CHECK:       for %i0 = 0 to 4 {
+  // CHECK-NEXT:    for %i1 = 0 to 256 {
   // CHECK-NEXT:      %2 = load %1[%i0, %i1] : memref<4x256xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i2 = 0 to 16 {
+  // CHECK-NEXT:    for %i2 = 0 to 16 {
   // CHECK-NEXT:      %3 = affine.apply [[MAP0]](%i0, %i0, %i2)
   // CHECK-NEXT:      %4 = affine.apply [[MAP1]](%i0, %i0, %i2)
   // CHECK-NEXT:      store %cst, %0[%3, %4] : memref<1x16xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i3 = 0 to 16 {
+  // CHECK-NEXT:    for %i3 = 0 to 16 {
   // CHECK-NEXT:      %5 = affine.apply [[MAP0]](%i0, %i0, %i3)
   // CHECK-NEXT:      %6 = affine.apply [[MAP1]](%i0, %i0, %i3)
   // CHECK-NEXT:      %7 = load %0[%5, %6] : memref<1x16xf32>
@@ -1134,31 +1134,31 @@ func @should_fuse_at_depth1_with_trip_count_20() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  affine.for %i0 = 0 to 100 {
+  for %i0 = 0 to 100 {
     store %cf0, %a[%i0]: memref<100xf32>
   }
 
-  affine.for %i1 = 0 to 5 {
-    affine.for %i2 = 0 to 10 {
+  for %i1 = 0 to 5 {
+    for %i2 = 0 to 10 {
       %v0 = load %a[%i2]: memref<100xf32>
     }
-    affine.for %i3 = 0 to 10 {
-      affine.for %i4 = 0 to 20 {
+    for %i3 = 0 to 10 {
+      for %i4 = 0 to 20 {
         %v1 = load %a[%i4]: memref<100xf32>
       }
     }
   }
   // NOTE: The size of the private memref created for fusion is shrunk to 20xf32
   // CHECK-DAG:   %0 = alloc() : memref<20xf32>
-  // CHECK:       affine.for %i0 = 0 to 5 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 20 {
+  // CHECK:       for %i0 = 0 to 5 {
+  // CHECK-NEXT:    for %i1 = 0 to 20 {
   // CHECK-NEXT:      store %cst, %0[%i1] : memref<20xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i2 = 0 to 10 {
+  // CHECK-NEXT:    for %i2 = 0 to 10 {
   // CHECK-NEXT:      %1 = load %0[%i2] : memref<20xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i3 = 0 to 10 {
-  // CHECK-NEXT:      affine.for %i4 = 0 to 20 {
+  // CHECK-NEXT:    for %i3 = 0 to 10 {
+  // CHECK-NEXT:      for %i4 = 0 to 20 {
   // CHECK-NEXT:        %2 = load %0[%i4] : memref<20xf32>
   // CHECK-NEXT:      }
   // CHECK-NEXT:    }
@@ -1175,31 +1175,31 @@ func @should_fuse_at_depth1_with_trip_count_19() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  affine.for %i0 = 0 to 100 {
+  for %i0 = 0 to 100 {
     store %cf0, %a[%i0]: memref<100xf32>
   }
 
-  affine.for %i1 = 0 to 5 {
-    affine.for %i2 = 0 to 19 {
+  for %i1 = 0 to 5 {
+    for %i2 = 0 to 19 {
       %v0 = load %a[%i2]: memref<100xf32>
     }
-    affine.for %i3 = 0 to 10 {
-      affine.for %i4 = 0 to 10 {
+    for %i3 = 0 to 10 {
+      for %i4 = 0 to 10 {
         %v1 = load %a[%i4]: memref<100xf32>
       }
     }
   }
   // NOTE: The size of the private memref created for fusion is shrunk to 19xf32
   // CHECK-DAG:   %0 = alloc() : memref<19xf32>
-  // CHECK:       affine.for %i0 = 0 to 5 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 19 {
+  // CHECK:       for %i0 = 0 to 5 {
+  // CHECK-NEXT:    for %i1 = 0 to 19 {
   // CHECK-NEXT:      store %cst, %0[%i1] : memref<19xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i2 = 0 to 19 {
+  // CHECK-NEXT:    for %i2 = 0 to 19 {
   // CHECK-NEXT:      %1 = load %0[%i2] : memref<19xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i3 = 0 to 10 {
-  // CHECK-NEXT:      affine.for %i4 = 0 to 10 {
+  // CHECK-NEXT:    for %i3 = 0 to 10 {
+  // CHECK-NEXT:      for %i4 = 0 to 10 {
   // CHECK-NEXT:        %2 = load %0[%i4] : memref<19xf32>
   // CHECK-NEXT:      }
   // CHECK-NEXT:    }
@@ -1217,26 +1217,26 @@ func @should_fuse_with_private_memrefs_with_diff_shapes() {
   %m = alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 100 {
+  for %i0 = 0 to 100 {
     store %cf7, %m[%i0] : memref<100xf32>
   }
-  affine.for %i1 = 0 to 17 {
+  for %i1 = 0 to 17 {
     %v0 = load %m[%i1] : memref<100xf32>
   }
-  affine.for %i2 = 0 to 82 {
+  for %i2 = 0 to 82 {
     %v1 = load %m[%i2] : memref<100xf32>
   }
   // Should create two new private memrefs customized to the shapes accessed
   // by loops %i1 and %i2.
   // CHECK-DAG:  %0 = alloc() : memref<1xf32>
   // CHECK-DAG:  %1 = alloc() : memref<1xf32>
-  // CHECK:      affine.for %i0 = 0 to 82 {
+  // CHECK:      for %i0 = 0 to 82 {
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %1[%2] : memref<1xf32>
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   %4 = load %1[%3] : memref<1xf32>
   // CHECK-NEXT: }
-  // CHECK-NEXT: affine.for %i1 = 0 to 17 {
+  // CHECK-NEXT: for %i1 = 0 to 17 {
   // CHECK-NEXT:   %5 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:   store %cst, %0[%5] : memref<1xf32>
   // CHECK-NEXT:   %6 = affine.apply [[MAP0]](%i1, %i1)
@@ -1252,18 +1252,18 @@ func @should_fuse_with_private_memrefs_with_diff_shapes() {
 func @should_not_fuse_live_out_arg(%arg0: memref<10xf32>) {
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %arg0[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %arg0[%i1] : memref<10xf32>
   }
   // This tests that the loop nest '%i0' should not be removed after fusion
   // because it writes to memref argument '%arg0'.
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %arg0[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    %0 = load %arg0[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1276,19 +1276,19 @@ func @should_not_fuse_live_out_arg(%arg0: memref<10xf32>) {
 func @should_not_fuse_escaping_memref() -> memref<10xf32> {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
   // This tests that the loop nest '%i0' should not be removed after fusion
   // because it writes to memref '%m' which is returned by the function. 
   // CHECK-DAG:   %0 = alloc() : memref<10xf32>
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    %1 = load %0[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return %0 : memref<10xf32>
@@ -1303,17 +1303,17 @@ func @R3_to_R2_reshape() {
 
   %c0 = constant 0 : index
 
-  affine.for %i0 = 0 to 2 {
-    affine.for %i1 = 0 to 3 {
-      affine.for %i2 = 0 to 16 {
+  for %i0 = 0 to 2 {
+    for %i1 = 0 to 3 {
+      for %i2 = 0 to 16 {
         %val = "foo"(%i0, %i1, %i2) : (index, index, index) -> i32
         store %val, %in[%i0, %i1, %i2] : memref<2x3x16xi32>
       }
     }
   }
 
-  affine.for %ii = 0 to 32 {
-    affine.for %jj = 0 to 3 {
+  for %ii = 0 to 32 {
+    for %jj = 0 to 3 {
       %a0 = affine.apply (d0, d1) -> (d0 * 3 + d1) (%ii, %jj)
       %idx = affine.apply (d0) -> (d0 floordiv (3 * 16)) (%a0)
       %v = load %in[%idx, %jj, %c0]
@@ -1332,8 +1332,8 @@ func @R3_to_R2_reshape() {
 
 // CHECK-LABEL: func @R3_to_R2_reshape()
 // CHECK-DAG:    %0 = alloc() : memref<1x1x1xi32>
-// CHECK:        affine.for %i0 = 0 to 32 {
-// CHECK-NEXT:     affine.for %i1 = 0 to 3 {
+// CHECK:        for %i0 = 0 to 32 {
+// CHECK-NEXT:     for %i1 = 0 to 3 {
 // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %2 = affine.apply [[MAP1]]()[%c0]
 // CHECK-NEXT:      %3 = "foo"(%1, %i1, %2) : (index, index, index) -> i32
@@ -1360,19 +1360,19 @@ func @should_not_fuse_multi_output_producer() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
     store %cf7, %b[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %a[%i1] : memref<10xf32>
   }
 
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    %2 = load %0[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1389,30 +1389,30 @@ func @fusion_preventing_deps_on_middle_loop() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %v0, %b[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %a[%i1] : memref<10xf32>
     %v1 = load %c[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v2 = load %b[%i2] : memref<10xf32>
     store %v2, %c[%i2] : memref<10xf32>
   }
   // Loops '%i0' and '%i2' cannot fuse along producer/consumer edge on memref
   // '%b', because of the WAR dep from '%i0' to '%i1' on memref '%a' and
   // because of the WAR dep from '%i1' to '%i2' on memref '%c'.
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = load %0[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %3, %1[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%i1] : memref<10xf32>
   // CHECK-NEXT:    %4 = load %2[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i2 = 0 to 10 {
+  // CHECK-NEXT:  for %i2 = 0 to 10 {
   // CHECK-NEXT:    %5 = load %1[%i2] : memref<10xf32>
   // CHECK-NEXT:    store %5, %2[%i2] : memref<10xf32>
   // CHECK-NEXT:  }
@@ -1432,17 +1432,17 @@ func @should_fuse_and_move_to_preserve_war_dep() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %b[%i0] : memref<10xf32>
     store %v0, %a[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 3 {
+  for %i1 = 0 to 3 {
     %v2 = load %c[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 5 {
+  for %i2 = 0 to 5 {
     store %cf7, %b[%i2] : memref<10xf32>
   }
-  affine.for %i3 = 0 to 10 {
+  for %i3 = 0 to 10 {
     %v1 = load %a[%i3] : memref<10xf32>
     store %cf7, %c[%i3] : memref<10xf32>
   }
@@ -1461,10 +1461,10 @@ func @should_fuse_and_move_to_preserve_war_dep() {
   // if the fused loop nest is inserted between loops '%i1' and '%i2'.
 
   // CHECK-DAG:   %0 = alloc() : memref<1xf32>
-  // CHECK:       affine.for %i0 = 0 to 3 {
+  // CHECK:       for %i0 = 0 to 3 {
   // CHECK-NEXT:    %3 = load %2[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    %4 = load %1[%i1] : memref<10xf32>
   // CHECK-NEXT:    %5 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:    store %4, %0[%5] : memref<1xf32>
@@ -1472,7 +1472,7 @@ func @should_fuse_and_move_to_preserve_war_dep() {
   // CHECK-NEXT:    %7 = load %0[%6] : memref<1xf32>
   // CHECK-NEXT:    store %cst, %2[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i2 = 0 to 5 {
+  // CHECK-NEXT:  for %i2 = 0 to 5 {
   // CHECK-NEXT:    store %cst, %1[%i2] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1489,30 +1489,30 @@ func @fusion_preventing_dep_on_constant() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %b[%i0] : memref<10xf32>
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %b[%i1] : memref<10xf32>
   }
   %cf11 = constant 11.0 : f32
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v2 = load %a[%i2] : memref<10xf32>
     store %cf11, %c[%i2] : memref<10xf32>
   }
   // Loops '%i0' and '%i2' cannot fuse along producer/consumer edge on memref
   // '%a', because of the WAR dep from '%i0' to '%i1' on memref '%b' and
   // because of the SSA value dep from '%cf11' def to use in '%i2'.
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = load %1[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %1[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  %cst_0 = constant 1.100000e+01 : f32
-  // CHECK-NEXT:  affine.for %i2 = 0 to 10 {
+  // CHECK-NEXT:  for %i2 = 0 to 10 {
   // CHECK-NEXT:    %4 = load %0[%i2] : memref<10xf32>
   // CHECK-NEXT:    store %cst_0, %2[%i2] : memref<10xf32>
   // CHECK-NEXT:  }
@@ -1532,14 +1532,14 @@ func @should_fuse_and_preserve_dep_on_constant() {
 
   %cf7 = constant 7.0 : f32
   %cf11 = constant 11.0 : f32
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %v0 = load %b[%i0] : memref<10xf32>
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     store %cf7, %b[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v2 = load %a[%i2] : memref<10xf32>
     store %cf11, %c[%i2] : memref<10xf32>
   }
@@ -1549,7 +1549,7 @@ func @should_fuse_and_preserve_dep_on_constant() {
   // the SSA value dep from '%cf11' def to use in '%i2'.
 
   // CHECK:       %cst_0 = constant 1.100000e+01 : f32
-  // CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+  // CHECK-NEXT:  for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = load %1[%i0] : memref<10xf32>
   // CHECK-NEXT:    %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %cst, %0[%4] : memref<1xf32>
@@ -1557,7 +1557,7 @@ func @should_fuse_and_preserve_dep_on_constant() {
   // CHECK-NEXT:    %6 = load %0[%5] : memref<1xf32>
   // CHECK-NEXT:    store %cst_0, %2[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %1[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1575,25 +1575,25 @@ func @should_fuse_and_preserve_dep_on_constant() {
 func @should_fuse_at_depth_above_loop_carried_dependence(%arg0: memref<64x4xf32>, %arg1: memref<64x4xf32>) {
   %out = alloc() : memref<64x4xf32>
   %0 = constant 0.0 : f32
-  affine.for %i0 = 0 to 64 {
-    affine.for %i1 = 0 to 4 {
+  for %i0 = 0 to 64 {
+    for %i1 = 0 to 4 {
       store %0, %out[%i0, %i1] : memref<64x4xf32>
     }
   }
-  affine.for %i2 = 0 to 4 {
-    affine.for %i3 = 0 to 4 {
-      affine.for %i4 = 0 to 16 {
+  for %i2 = 0 to 4 {
+    for %i3 = 0 to 4 {
+      for %i4 = 0 to 16 {
         %1 = affine.apply (d0, d1) -> (d0 * 16 - d1 + 15)(%i3, %i4)
         %2 = load %arg1[%1, %i2] : memref<64x4xf32>
         "op0"(%2) : (f32) -> ()
       }
-      affine.for %i5 = 0 to 4 {
-        affine.for %i6 = 0 to 16 {
+      for %i5 = 0 to 4 {
+        for %i6 = 0 to 16 {
           %3 = affine.apply (d0, d1) -> (d0 * 16 - d1 + 15)(%i5, %i6)
           %4 = load %arg0[%3, %i3] : memref<64x4xf32>
           "op1"(%4) : (f32) -> ()
         }
-        affine.for %i7 = 0 to 16 {
+        for %i7 = 0 to 16 {
           %5 = "op2"() : () -> (f32)
           %6 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i5, %i7)
           %7 = load %out[%6, %i2] : memref<64x4xf32>
@@ -1613,25 +1613,25 @@ func @should_fuse_at_depth_above_loop_carried_dependence(%arg0: memref<64x4xf32>
   // memref size can be reduced to 128x1xf32.
 
   // CHECK:       %0 = alloc() : memref<64x1xf32>
-  // CHECK:       affine.for %i0 = 0 to 4 {
-  // CHECK-NEXT:    affine.for %i1 = 0 to 64 {
+  // CHECK:       for %i0 = 0 to 4 {
+  // CHECK-NEXT:    for %i1 = 0 to 64 {
   // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1, %i0)
   // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1, %i0)
   // CHECK-NEXT:      store %cst, %0[%1, %2] : memref<64x1xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    affine.for %i2 = 0 to 4 {
-  // CHECK-NEXT:      affine.for %i3 = 0 to 16 {
+  // CHECK-NEXT:    for %i2 = 0 to 4 {
+  // CHECK-NEXT:      for %i3 = 0 to 16 {
   // CHECK-NEXT:        %3 = affine.apply [[MAP2]](%i2, %i3)
   // CHECK-NEXT:        %4 = load %arg1[%3, %i0] : memref<64x4xf32>
   // CHECK-NEXT:        "op0"(%4) : (f32) -> ()
   // CHECK-NEXT:      }
-  // CHECK-NEXT:      affine.for %i4 = 0 to 4 {
-  // CHECK-NEXT:        affine.for %i5 = 0 to 16 {
+  // CHECK-NEXT:      for %i4 = 0 to 4 {
+  // CHECK-NEXT:        for %i5 = 0 to 16 {
   // CHECK-NEXT:          %5 = affine.apply [[MAP2]](%i4, %i5)
   // CHECK-NEXT:          %6 = load %arg0[%5, %i2] : memref<64x4xf32>
   // CHECK-NEXT:          "op1"(%6) : (f32) -> ()
   // CHECK-NEXT:        }
-  // CHECK-NEXT:        affine.for %i6 = 0 to 16 {
+  // CHECK-NEXT:        for %i6 = 0 to 16 {
   // CHECK-NEXT:          %7 = "op2"() : () -> f32
   // CHECK-NEXT:          %8 = affine.apply [[MAP3]](%i4, %i6)
   // CHECK-NEXT:          %9 = affine.apply [[MAP0]](%i0, %8, %i0)
@@ -1660,14 +1660,14 @@ func @should_fuse_after_private_memref_creation() {
 
   %cf7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %a[%i1] : memref<10xf32>
     store %v0, %b[%i1] : memref<10xf32>
   }
-  affine.for %i2 = 0 to 10 {
+  for %i2 = 0 to 10 {
     %v1 = load %a[%i2] : memref<10xf32>
     store %v1, %b[%i2] : memref<10xf32>
   }
@@ -1678,14 +1678,14 @@ func @should_fuse_after_private_memref_creation() {
   // private memref, the dependence between '%i0' and '%i1' on memref '%a' no
   // longer exists, so '%i0' can now be fused into '%i2'.
 
-  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK:       for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %cst, %1[%3] : memref<1xf32>
   // CHECK-NEXT:    %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    %5 = load %1[%4] : memref<1xf32>
   // CHECK-NEXT:    store %5, %2[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
+  // CHECK-NEXT:  for %i1 = 0 to 10 {
   // CHECK-NEXT:    %6 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:    store %cst, %0[%6] : memref<1xf32>
   // CHECK-NEXT:    %7 = affine.apply [[MAP0]](%i1, %i1)
diff --git a/mlir/test/Transforms/loop-tiling.mlir b/mlir/test/Transforms/loop-tiling.mlir
index a1f9d717fab..c2fdbd4f80f 100644
--- a/mlir/test/Transforms/loop-tiling.mlir
+++ b/mlir/test/Transforms/loop-tiling.mlir
@@ -8,12 +8,12 @@
 // CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
 
 // CHECK-LABEL: func @loop_tiling()
-// CHECK-NEXT:   affine.for %i0 = 0 to 256 step 32 {
-// CHECK-NEXT:     affine.for %i1 = 0 to 512 step 32 {
-// CHECK-NEXT:       affine.for %i2 = 0 to 1024 step 32 {
-// CHECK-NEXT:         affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
-// CHECK-NEXT:           affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
-// CHECK-NEXT:             affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
+// CHECK-NEXT:   for %i0 = 0 to 256 step 32 {
+// CHECK-NEXT:     for %i1 = 0 to 512 step 32 {
+// CHECK-NEXT:       for %i2 = 0 to 1024 step 32 {
+// CHECK-NEXT:         for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
+// CHECK-NEXT:           for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
+// CHECK-NEXT:             for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
 // CHECK-NEXT:               "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
 // CHECK-NEXT:             }
 // CHECK-NEXT:           }
@@ -21,32 +21,32 @@
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT:   affine.for %i6 = 0 to 50 step 32 {
-// CHECK-NEXT:     affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
+// CHECK-NEXT:   for %i6 = 0 to 50 step 32 {
+// CHECK-NEXT:     for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
 // CHECK-NEXT:       "bar"(%i7, %i7) : (index, index) -> ()
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
-// CHECK-NEXT:    affine.for %i9 = [[IDENTITY]](%i8) to 21 {
+// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
+// CHECK-NEXT:    for %i9 = [[IDENTITY]](%i8) to 21 {
 // CHECK-NEXT:      "foobar"(%i9) : (index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 func @loop_tiling() {
-  affine.for %i = 0 to 256 {
-    affine.for %j = 0 to 512 {
-      affine.for %k = 0 to 1024 {
+  for %i = 0 to 256 {
+    for %j = 0 to 512 {
+      for %k = 0 to 1024 {
         "foo"(%i, %j, %k) : (index, index, index) -> ()
       }
     }
   }
 
-  affine.for %x = 0 to 50 {
+  for %x = 0 to 50 {
     "bar"(%x, %x) : (index, index) -> ()
   }
 
   // Intra-tile loop won't need a min expression.
-  affine.for %y = 0 to 21 {
+  for %y = 0 to 21 {
     "foobar"(%y) : (index) -> ()
   }
 
@@ -58,12 +58,12 @@ func @loop_tiling() {
 // CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
   %M = dim %A, 0 : memref<? x i32>
-  affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
+  for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
       %out = affine.apply (d0) -> (d0) (%iTT)
   }
   return
-// CHECK:       affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
-// CHECK-NEXT:    affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
+// CHECK:       for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
+// CHECK-NEXT:    for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
 // CHECK-NEXT:      %1 = affine.apply [[IDENTITY]](%i1)
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
diff --git a/mlir/test/Transforms/lower-affine.mlir b/mlir/test/Transforms/lower-affine.mlir
index 5882da5c749..22e9f4b9fd4 100644
--- a/mlir/test/Transforms/lower-affine.mlir
+++ b/mlir/test/Transforms/lower-affine.mlir
@@ -24,7 +24,7 @@ func @body(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @simple_loop() {
-  affine.for %i = 1 to 42 {
+  for %i = 1 to 42 {
     call @body(%i) : (index) -> ()
   }
   return
@@ -65,9 +65,9 @@ func @post(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @imperfectly_nested_loops() {
-  affine.for %i = 0 to 42 {
+  for %i = 0 to 42 {
     call @pre(%i) : (index) -> ()
-    affine.for %j = 7 to 56 step 2 {
+    for %j = 7 to 56 step 2 {
       call @body2(%i, %j) : (index, index) -> ()
     }
     call @post(%i) : (index) -> ()
@@ -122,13 +122,13 @@ func @body3(index, index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @more_imperfectly_nested_loops() {
-  affine.for %i = 0 to 42 {
+  for %i = 0 to 42 {
     call @pre(%i) : (index) -> ()
-    affine.for %j = 7 to 56 step 2 {
+    for %j = 7 to 56 step 2 {
       call @body2(%i, %j) : (index, index) -> ()
     }
     call @mid(%i) : (index) -> ()
-    affine.for %k = 18 to 37 step 3 {
+    for %k = 18 to 37 step 3 {
       call @body3(%i, %k) : (index, index) -> ()
     }
     call @post(%i) : (index) -> ()
@@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @affine_apply_loops_shorthand(%N : index) {
-  affine.for %i = 0 to %N {
-    affine.for %j = %i to 42 {
+  for %i = 0 to %N {
+    for %j = %i to 42 {
       call @body2(%i, %j) : (index, index) -> ()
     }
   }
@@ -360,7 +360,7 @@ func @if_for() {
 // CHECK-NEXT: [[outerEndBB]]:
 // CHECK-NEXT:   br [[outerLoopInit:\^bb[0-9]+]]
   affine.if #set1(%i) {
-    affine.for %j = 0 to 42 {
+    for %j = 0 to 42 {
       affine.if #set2(%j) {
         call @body2(%i, %j) : (index, index) -> ()
       }
@@ -397,9 +397,9 @@ func @if_for() {
 // CHECK-NEXT:   %c1_9 = constant 1 : index
 // CHECK-NEXT:   %16 = addi %9, %c1_9 : index
 // CHECK-NEXT:   br [[outerLoopCond]](%16 : index)
-  affine.for %k = 0 to 42 {
+  for %k = 0 to 42 {
     affine.if #set2(%k) {
-      affine.for %l = 0 to 42 {
+      for %l = 0 to 42 {
         call @body3(%k, %l) : (index, index) -> ()
       }
     }
@@ -446,8 +446,8 @@ func @if_for() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @loop_min_max(%N : index) {
-  affine.for %i = 0 to 42 {
-    affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
+  for %i = 0 to 42 {
+    for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
       call @body2(%i, %j) : (index, index) -> ()
     }
   }
@@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @min_reduction_tree(%v : index) {
-  affine.for %i = 0 to min #map_7_values(%v)[] {
+  for %i = 0 to min #map_7_values(%v)[] {
     call @body(%i) : (index) -> ()
   }
   return
diff --git a/mlir/test/Transforms/memref-bound-check.mlir b/mlir/test/Transforms/memref-bound-check.mlir
index b3d5b23e70f..2926bf1afbc 100644
--- a/mlir/test/Transforms/memref-bound-check.mlir
+++ b/mlir/test/Transforms/memref-bound-check.mlir
@@ -11,8 +11,8 @@ func @test() {
   %A = alloc() : memref<9 x 9 x i32>
   %B = alloc() : memref<111 x i32>
 
-  affine.for %i = -1 to 10 {
-    affine.for %j = -1 to 10 {
+  for %i = -1 to 10 {
+    for %j = -1 to 10 {
       %idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
       %idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
       // Out of bound access.
@@ -27,7 +27,7 @@ func @test() {
     }
   }
 
-  affine.for %k = 0 to 10 {
+  for %k = 0 to 10 {
       // In bound.
       %u = load %B[%zero] : memref<111 x i32>
       // Out of bounds.
@@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
   %zero = constant 0 : index
   %A = alloc() : memref<128 x 64 x 64 x i32>
 
-  affine.for %i = 0 to 256 {
-    affine.for %j = 0 to 256 {
+  for %i = 0 to 256 {
+    for %j = 0 to 256 {
       %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
       %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
       %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
   %C = alloc() : memref<257 x i32>
   %B = alloc() : memref<1 x i32>
 
-  affine.for %i = 0 to 256 {
-    affine.for %j = 0 to 256 {
+  for %i = 0 to 256 {
+    for %j = 0 to 256 {
       // All of these accesses are in bound; check that no errors are emitted.
       // CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
       // CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
@@ -93,8 +93,8 @@ func @mod_div() {
   %zero = constant 0 : index
   %A = alloc() : memref<128 x 64 x 64 x i32>
 
-  affine.for %i = 0 to 256 {
-    affine.for %j = 0 to 256 {
+  for %i = 0 to 256 {
+    for %j = 0 to 256 {
       %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
       %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
       %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@@ -115,8 +115,8 @@ func @mod_div() {
 // CHECK-LABEL: func @mod_floordiv_nested() {
 func @mod_floordiv_nested() {
   %A = alloc() : memref<256 x 256 x i32>
-  affine.for %i = 0 to 256 {
-    affine.for %j = 0 to 256 {
+  for %i = 0 to 256 {
+    for %j = 0 to 256 {
       %idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
       %idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
       load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'load' op memref out of upper bound access along dimension #2}}
@@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
 // CHECK-LABEL: func @test_semi_affine_bailout
 func @test_semi_affine_bailout(%N : index) {
   %B = alloc() : memref<10 x i32>
-  affine.for %i = 0 to 10 {
+  for %i = 0 to 10 {
     %idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
     %y = load %B[%idx] : memref<10 x i32>
   }
@@ -138,7 +138,7 @@ func @test_semi_affine_bailout(%N : index) {
 // CHECK-LABEL: func @multi_mod_floordiv
 func @multi_mod_floordiv() {
   %A = alloc() : memref<2x2xi32>
-  affine.for %ii = 0 to 64 {
+  for %ii = 0 to 64 {
       %idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
       %idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
       %v = load %A[%idx0, %idx1] : memref<2x2xi32>
@@ -153,8 +153,8 @@ func @delinearize_mod_floordiv() {
   %out = alloc() : memref<64x9xi32>
 
   // Reshape '%in' into '%out'.
-  affine.for %ii = 0 to 64 {
-    affine.for %jj = 0 to 9 {
+  for %ii = 0 to 64 {
+    for %jj = 0 to 9 {
       %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
       %a10 = affine.apply (d0) ->
         (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@@ -189,7 +189,7 @@ func @out_of_bounds() {
   %in = alloc() : memref<1xi32>
   %c9 = constant 9 : i32
 
-  affine.for %i0 = 10 to 11 {
+  for %i0 = 10 to 11 {
     %idy = affine.apply (d0) ->  (100 * d0 floordiv 1000) (%i0)
     store %c9, %in[%idy] : memref<1xi32> // expected-error {{'store' op memref out of upper bound access along dimension #1}}
   }
diff --git a/mlir/test/Transforms/memref-dataflow-opt.mlir b/mlir/test/Transforms/memref-dataflow-opt.mlir
index ed39d71eefd..710d14c1cf9 100644
--- a/mlir/test/Transforms/memref-dataflow-opt.mlir
+++ b/mlir/test/Transforms/memref-dataflow-opt.mlir
@@ -10,14 +10,14 @@
 func @simple_store_load() {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
     %v0 = load %m[%i0] : memref<10xf32>
     %v1 = addf %v0, %v0 : f32
   }
   return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:  for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@@ -30,7 +30,7 @@ func @multi_store_load() {
   %cf8 = constant 8.0 : f32
   %cf9 = constant 9.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
     %v0 = load %m[%i0] : memref<10xf32>
     %v1 = addf %v0, %v0 : f32
@@ -45,7 +45,7 @@ func @multi_store_load() {
 // CHECK-NEXT:  %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %cst_0 = constant 8.000000e+00 : f32
 // CHECK-NEXT:  %cst_1 = constant 9.000000e+00 : f32
-// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:  for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    %1 = mulf %cst_1, %cst_1 : f32
 // CHECK-NEXT:  }
@@ -59,8 +59,8 @@ func @multi_store_load() {
 func @store_load_affine_apply() -> memref<10x10xf32> {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10x10xf32>
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
       %t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
       %t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
       %idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
   return %m : memref<10x10xf32>
 // CHECK:       %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %0 = alloc() : memref<10x10xf32>
-// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
-// CHECK-NEXT:    affine.for %i1 = 0 to 10 {
+// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:    for %i1 = 0 to 10 {
 // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1)
 // CHECK-NEXT:      %3 = affine.apply [[MAP2]](%1, %2)
@@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
 func @store_load_nested(%N : index) {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    affine.for %i1 = 0 to %N {
+    for %i1 = 0 to %N {
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
     }
   }
   return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
-// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
+// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:    for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    affine.for %i1 = 0 to %N {
+    for %i1 = 0 to %N {
       store %cf8, %m[%i1] : memref<10xf32>
     }
-    affine.for %i2 = 0 to %N {
+    for %i2 = 0 to %N {
       // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
@@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
   %cf7 = constant 7.0 : f32
   %cf9 = constant 9.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    affine.for %i1 = 0 to %N {
+    for %i1 = 0 to %N {
       // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
@@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
   %cf9 = constant 9.0 : f32
   %cf10 = constant 10.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    affine.for %i1 = 0 to %N {
+    for %i1 = 0 to %N {
       store %cf8, %m[%i1] : memref<10xf32>
     }
-    affine.for %i2 = 0 to %N {
+    for %i2 = 0 to %N {
       store %cf9, %m[%i2] : memref<10xf32>
     }
     store %cf10, %m[%i0] : memref<10xf32>
-    affine.for %i3 = 0 to %N {
+    for %i3 = 0 to %N {
       // CHECK-NOT: %{{[0-9]+}} = load
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
@@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
 func @store_load_no_fwd() {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    affine.for %i1 = 0 to 10 {
-      affine.for %i2 = 0 to 10 {
+    for %i1 = 0 to 10 {
+      for %i2 = 0 to 10 {
         // CHECK: load %{{[0-9]+}}
         %v0 = load %m[%i2] : memref<10xf32>
         %v1 = addf %v0, %v0 : f32
@@ -202,9 +202,9 @@ func @store_load_fwd() {
   %c0 = constant 0 : index
   %m = alloc() : memref<10xf32>
   store %cf7, %m[%c0] : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
-      affine.for %i2 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
+      for %i2 = 0 to 10 {
         // CHECK-NOT: load %{{[0-9]}}+
         %v0 = load %m[%c0] : memref<10xf32>
         %v1 = addf %v0, %v0 : f32
@@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %m = alloc() : memref<10xf32>
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    affine.for %i1 = 0 to %N {
+    for %i1 = 0 to %N {
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
       %idx = affine.apply (d0) -> (d0 + 1) (%i0)
@@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
   %v3 = load %m[%c1] : memref<10xf32>
   return %v3 : f32
 // CHECK:       %0 = alloc() : memref<10xf32>
-// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:  for %i0 = 0 to 10 {
 // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
-// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
+// CHECK-NEXT:    for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %1 = addf %cst, %cst : f32
 // CHECK-NEXT:      %2 = affine.apply [[MAP4]](%i0)
 // CHECK-NEXT:      store %cst_0, %0[%2] : memref<10xf32>
diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir
index 00d0e730098..0accc30630b 100644
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
   // ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
   // and thus the store "may" conditionally execute before the load.
   affine.if #set0(%c0) {
-    affine.for %i0 = 0 to 10 {
+    for %i0 = 0 to 10 {
       store %cf7, %m[%i0] : memref<10xf32>
       // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
       // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
       // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
     }
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
     // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
     // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@@ -37,13 +37,13 @@ func @dependent_loops() {
   %cst = constant 7.000000e+00 : f32
   // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
   // because the first loop with the store dominates the second loop.
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     store %cst, %0[%i0] : memref<10xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
     // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
     // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
   }
-  affine.for %i1 = 0 to 10 {
+  for %i1 = 0 to 10 {
     %1 = load %0[%i1] : memref<10xf32>
     // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
     // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@@ -231,7 +231,7 @@ func @store_range_load_after_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
-  affine.for %i0 = 0 to %arg1 {
+  for %i0 = 0 to %arg1 {
     %a0 = affine.apply (d0) -> (d0) (%arg0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
@@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     // For dependence from 0 to 1, we do not have a loop carried dependence
     // because only the final write in the loop accesses the same element as the
@@ -305,7 +305,7 @@ func @store_range_load_before_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c0 = constant 0 : index
-  affine.for %i0 = 1 to 11 {
+  for %i0 = 1 to 11 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c0 = constant 0 : index
-  affine.for %i0 = 1 to 11 {
+  for %i0 = 1 to 11 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     // Dependence from 0 to 1 at depth 1 is a range because all loads at
     // constant index zero are reads after first store at index zero during
@@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
 func @store_plus_3() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 1 to 11 {
+  for %i0 = 1 to 11 {
     %a0 = affine.apply (d0) -> (d0 + 3) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -375,7 +375,7 @@ func @store_plus_3() {
 func @load_minus_2() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 2 to 11 {
+  for %i0 = 2 to 11 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -397,8 +397,8 @@ func @load_minus_2() {
 func @perfectly_nested_loops_loop_independent() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 0 to 11 {
-    affine.for %i1 = 0 to 11 {
+  for %i0 = 0 to 11 {
+    for %i1 = 0 to 11 {
       // Dependence from access 0 to 1 is loop independent at depth = 3.
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
 func @perfectly_nested_loops_loop_carried_at_depth1() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 0 to 9 {
-    affine.for %i1 = 0 to 9 {
+  for %i0 = 0 to 9 {
+    for %i1 = 0 to 9 {
       // Dependence from access 0 to 1 is loop carried at depth 1.
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
 func @perfectly_nested_loops_loop_carried_at_depth2() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
       // Dependence from access 0 to 1 is loop carried at depth 2.
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@@ -491,8 +491,8 @@ func @one_common_loop() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   // There is a loop-independent dependence from access 0 to 1 at depth 2.
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
       store %c7, %m[%a00, %a01] : memref<10x10xf32>
@@ -502,7 +502,7 @@ func @one_common_loop() {
       // expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
       // expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
     }
-    affine.for %i2 = 0 to 9 {
+    for %i2 = 0 to 9 {
       %a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
       %a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
       %v0 = load %m[%a10, %a11] : memref<10x10xf32>
@@ -525,7 +525,7 @@ func @dependence_cycle() {
   // Dependences:
   // *) loop-independent dependence from access 1 to 2 at depth 2.
   // *) loop-carried dependence from access 3 to 0 at depth 1.
-  affine.for %i0 = 0 to 9 {
+  for %i0 = 0 to 9 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     %v0 = load %m.a[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -575,8 +575,8 @@ func @dependence_cycle() {
 func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 0 to %arg0 {
-    affine.for %i1 = 0 to %arg1 {
+  for %i0 = 0 to %arg0 {
+    for %i1 = 0 to %arg1 {
       %a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
       %v0 = load %m[%a00, %a01] : memref<10x10xf32>
@@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
 func @war_raw_waw_deps() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 0 to 10 {
-    affine.for %i1 = 0 to 10 {
+  for %i0 = 0 to 10 {
+    for %i1 = 0 to 10 {
       %a0 = affine.apply (d0) -> (d0 + 1) (%i1)
       %v0 = load %m[%a0] : memref<100xf32>
       // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
 func @mod_deps() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  affine.for %i0 = 0 to 10 {
+  for %i0 = 0 to 10 {
     %a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
     // Results are conservative here since we currently don't have a way to
     // represent strided sets in FlatAffineConstraints.
@@ -658,8 +658,8 @@ func @loop_nest_depth() {
   %0 = alloc() : memref<100x100xf32>
   %c7 = constant 7.0 : f32
 
-  affine.for %i0 = 0 to 128 {
-    affine.for %i1 = 0 to 8 {
+  for %i0 = 0 to 128 {
+    for %i1 = 0 to 8 {
       store %c7, %0[%i0, %i1] : memref<100x100xf32>
       // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
       // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@@ -667,10 +667,10 @@ func @loop_nest_depth() {
       // expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
     }
   }
-  affine.for %i2 = 0 to 8 {
-    affine.for %i3 = 0 to 8 {
-      affine.for %i4 = 0 to 8 {
-        affine.for %i5 = 0 to 16 {
+  for %i2 = 0 to 8 {
+    for %i3 = 0 to 8 {
+      for %i4 = 0 to 8 {
+        for %i5 = 0 to 16 {
           %8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
           %9 = load %0[%8, %i3] : memref<100x100xf32>
           // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
@@ -693,9 +693,9 @@ func @loop_nest_depth() {
 func @mod_div_3d() {
   %M = alloc() : memref<2x2x2xi32>
   %c0 = constant 0 : i32
-  affine.for %i0 = 0 to 8 {
-    affine.for %i1 = 0 to 8 {
-      affine.for %i2 = 0 to 8 {
+  for %i0 = 0 to 8 {
+    for %i1 = 0 to 8 {
+      for %i2 = 0 to 8 {
         %idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
         %idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
         %idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
@@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
   %in = alloc() : memref<2x2x3x3x16x1xi32>
   %out = alloc() : memref<64x9xi32>
 
-  affine.for %i0 = 0 to 2 {
-    affine.for %i1 = 0 to 2 {
-      affine.for %i2 = 0 to 3 {
-        affine.for %i3 = 0 to 3 {
-          affine.for %i4 = 0 to 16 {
-            affine.for %i5 = 0 to 1 {
+  for %i0 = 0 to 2 {
+    for %i1 = 0 to 2 {
+      for %i2 = 0 to 3 {
+        for %i3 = 0 to 3 {
+          for %i4 = 0 to 16 {
+            for %i5 = 0 to 1 {
               store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
 // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
 // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
     }
   }
 
-  affine.for %ii = 0 to 64 {
-    affine.for %jj = 0 to 9 {
+  for %ii = 0 to 64 {
+    for %jj = 0 to 9 {
       %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
       %a10 = affine.apply (d0) ->
         (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
diff --git a/mlir/test/Transforms/pipeline-data-transfer.mlir b/mlir/test/Transforms/pipeline-data-transfer.mlir
index ede5c63fbac..30f98db2583 100644
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@@ -16,13 +16,13 @@ func @loop_nest_dma() {
   %zero = constant 0 : index
   %num_elts = constant 128 : index
 
-  affine.for %i = 0 to 8 {
+  for %i = 0 to 8 {
     dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
     dma_wait %tag[%zero], %num_elts : memref<1 x f32>
     %v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
     %r = "compute"(%v) : (f32) -> (f32)
     store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
-    affine.for %j = 0 to 128 {
+    for %j = 0 to 128 {
       "do_more_compute"(%i, %j) : (index, index) -> ()
     }
   }
@@ -34,7 +34,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %3 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  %4 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-// CHECK-NEXT:  affine.for %i0 = 1 to 8 {
+// CHECK-NEXT:  for %i0 = 1 to 8 {
 // CHECK-NEXT:    %5 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    %6 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@@ -45,7 +45,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:    %10 = load %1[%9, %7] : memref<2x32xf32, 1>
 // CHECK-NEXT:    %11 = "compute"(%10) : (f32) -> f32
 // CHECK-NEXT:    store %11, %1[%9, %7] : memref<2x32xf32, 1>
-// CHECK-NEXT:    affine.for %i1 = 0 to 128 {
+// CHECK-NEXT:    for %i1 = 0 to 128 {
 // CHECK-NEXT:      "do_more_compute"(%7, %i1) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@@ -56,7 +56,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %15 = load %1[%14, %12] : memref<2x32xf32, 1>
 // CHECK-NEXT:  %16 = "compute"(%15) : (f32) -> f32
 // CHECK-NEXT:  store %16, %1[%14, %12] : memref<2x32xf32, 1>
-// CHECK-NEXT:  affine.for %i2 = 0 to 128 {
+// CHECK-NEXT:  for %i2 = 0 to 128 {
 // CHECK-NEXT:    "do_more_compute"(%12, %i2) : (index, index) -> ()
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@@ -68,7 +68,7 @@ func @loop_step(%arg0: memref<512xf32>,
                   %arg1: memref<512xf32>) {
   %c0 = constant 0 : index
   %c4 = constant 4 : index
-  affine.for %i0 = 0 to 512 step 4 {
+  for %i0 = 0 to 512 step 4 {
     %1 = alloc() : memref<4xf32, 1>
     %2 = alloc() : memref<1xi32>
     dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
@@ -82,7 +82,7 @@ func @loop_step(%arg0: memref<512xf32>,
 // CHECK:        %2 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK:        %3 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK-NEXT:   dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
-// CHECK-NEXT:   affine.for %i0 = 4 to 512 step 4 {
+// CHECK-NEXT:   for %i0 = 4 to 512 step 4 {
 // CHECK-NEXT:     %4 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     %5 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@@ -114,8 +114,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
   // Prologue for DMA overlap on arg2.
   // CHECK:[[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
   // CHECK: dma_start %arg2[
-  // CHECK: affine.for %i0 = 1 to 8 {
-  affine.for %i0 = 0 to 8 {
+  // CHECK: for %i0 = 1 to 8 {
+  for %i0 = 0 to 8 {
     %6 = affine.apply #map2(%i0)
     dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
     dma_wait %5[%c0], %num_elts : memref<2xi32>
@@ -127,8 +127,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
     // CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
     // CHECK: dma_start %arg0[
     // CHECK: dma_start %arg1[
-    // CHECK-NEXT affine.for %i1 = 1 to 8 {
-    affine.for %i1 = 0 to 8 {
+    // CHECK-NEXT for %i1 = 1 to 8 {
+    for %i1 = 0 to 8 {
       %7 = affine.apply #map1(%i0, %i1)
       %8 = affine.apply #map2(%i1)
       dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
@@ -140,8 +140,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
       // CHECK: dma_start %arg1[
       // CHECK: dma_wait [[TAG_ARG0]]
       // CHECK: dma_wait [[TAG_ARG1]]
-      // CHECK-NEXT: affine.for %i2 = 0 to 4 {
-      affine.for %i2 = 0 to 4 {
+      // CHECK-NEXT: for %i2 = 0 to 4 {
+      for %i2 = 0 to 4 {
         "foo"() : () -> ()
       }
     }
@@ -155,16 +155,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
   // CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
   // CHECK:  dma_start %arg0[
   // CHECK:  dma_start %arg1[
-  // CHECK:  affine.for %i4 = 1 to 8 {
+  // CHECK:  for %i4 = 1 to 8 {
   // CHECK:    dma_start %arg0[
   // CHECK:    dma_start %arg1[
   // CHECK:    dma_wait [[TAG_ARG0_NESTED]]
   // CHECK:    dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:    affine.for %i5 = 0 to 4 {
+  // CHECK:    for %i5 = 0 to 4 {
   // CHECK:      "foo"() : () -> ()
   // CHECK:  dma_wait [[TAG_ARG0_NESTED]]
   // CHECK:  dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:  affine.for %i6 = 0 to 4 {
+  // CHECK:  for %i6 = 0 to 4 {
   }
   return
 // CHECK: }
@@ -185,8 +185,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
   // The two DMAs below are dependent (incoming and outgoing on the same
   // memref) in the same iteration; so no pipelining here.
   // CHECK-NOT: dma_start
-  // CHECK: affine.for %i0 = 0 to 8 {
-  affine.for %i0 = 0 to 8 {
+  // CHECK: for %i0 = 0 to 8 {
+  for %i0 = 0 to 8 {
     %6 = affine.apply #map2(%i0)
     dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
     dma_wait %5[%c0], %num_elts : memref<2xi32>
@@ -206,8 +206,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
   %tag = alloc() : memref<1 x i32>
 
   // CHECK-NOT: dma_start
-  // CHECK: affine.for %i0 = 0 to 16 {
-  affine.for %kTT = 0 to 16 {
+  // CHECK: for %i0 = 0 to 16 {
+  for %kTT = 0 to 16 {
     dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
       memref<512 x 32 x f32>,
       memref<32 x 32 x f32, 2>, memref<1 x i32>
@@ -230,14 +230,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
   %tag = alloc() : memref<1 x i32>
 
   // CHECK-NOT: dma_start
-  // CHECK: affine.for %i0 = 0 to 16 {
-  affine.for %kTT = 0 to 16 {
+  // CHECK: for %i0 = 0 to 16 {
+  for %kTT = 0 to 16 {
     dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
       memref<512 x 32 x f32>,
       memref<32 x 32 x f32, 2>, memref<1 x i32>
     dma_wait %tag[%zero], %num_elt : memref<1 x i32>
   }
-  // Use live out of 'affine.for' inst; no DMA pipelining will be done.
+  // Use live out of 'for' inst; no DMA pipelining will be done.
   %v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
   return %v : f32
 // CHECK:      %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
@@ -261,14 +261,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
 // CHECK:       %5 = affine.apply [[MOD_2]](%c0)
 // CHECK:       %6 = affine.apply [[MOD_2]](%c0)
 // CHECK:       dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
-  affine.for %kTT = 0 to 16 {
+  for %kTT = 0 to 16 {
     dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
       memref<512 x 32 x f32>,
       memref<? x ? x f32, 2>, memref<1 x i32>
     dma_wait %tag[%zero], %num_elt : memref<1 x i32>
   }
   return
-// CHECK-NEXT:  affine.for %i0 = 1 to 16 {
+// CHECK-NEXT:  for %i0 = 1 to 16 {
 // CHECK:         %7 = affine.apply [[MOD_2]](%i0)
 // CHECK:         %8 = affine.apply [[MOD_2]](%i0)
 // CHECK:         dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
diff --git a/mlir/test/Transforms/simplify-affine-structures.mlir b/mlir/test/Transforms/simplify-affine-structures.mlir
index feb3a99b70b..2459604f369 100644
--- a/mlir/test/Transforms/simplify-affine-structures.mlir
+++ b/mlir/test/Transforms/simplify-affine-structures.mlir
@@ -73,8 +73,8 @@
 
 // CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
 func @test_gaussian_elimination_empty_set0() {
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
       affine.if (d0, d1) : (2 == 0)(%i0, %i1) {
       }
@@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {
 
 // CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
 func @test_gaussian_elimination_empty_set1() {
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
       affine.if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
       }
@@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {
 
 // CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_non_empty_set2() {
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       // CHECK: #set1(%i0, %i1)
       affine.if #set2(%i0, %i1) {
       }
@@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_empty_set3() {
   %c7 = constant 7 : index
   %c11 = constant 11 : index
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       // CHECK: #set2(%i0, %i1)[%c7, %c11]
       affine.if #set3(%i0, %i1)[%c7, %c11] {
       }
@@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
 func @test_gaussian_elimination_non_empty_set4() {
   %c7 = constant 7 : index
   %c11 = constant 11 : index
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       // CHECK: #set3(%i0, %i1)[%c7, %c11]
       affine.if #set4(%i0, %i1)[%c7, %c11] {
       }
@@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
 func @test_gaussian_elimination_empty_set5() {
   %c7 = constant 7 : index
   %c11 = constant 11 : index
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       // CHECK: #set2(%i0, %i1)[%c7, %c11]
       affine.if #set5(%i0, %i1)[%c7, %c11] {
       }
@@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {
 
 // CHECK-LABEL: func @test_fuzz_explosion
 func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
-  affine.for %i0 = 1 to 10 {
-    affine.for %i1 = 1 to 100 {
+  for %i0 = 1 to 10 {
+    for %i1 = 1 to 100 {
       affine.if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
       }
     }
@@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i
 
 // CHECK-LABEL: func @test_empty_set(%arg0: index) {
 func @test_empty_set(%N : index) {
-  affine.for %i = 0 to 10 {
-    affine.for %j = 0 to 10 {
+  for %i = 0 to 10 {
+    for %j = 0 to 10 {
       // CHECK: affine.if [[SET_EMPTY_2D]](%i0, %i1)
       affine.if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
         "foo"() : () -> ()
@@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
     }
   }
   // The tests below test GCDTightenInequalities().
-  affine.for %k = 0 to 10 {
-    affine.for %l = 0 to 10 {
+  for %k = 0 to 10 {
+    for %l = 0 to 10 {
       // Empty because no multiple of 8 lies between 4 and 7.
       // CHECK: affine.if [[SET_EMPTY_1D]](%i2)
       affine.if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
@@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
     }
   }
 
-  affine.for %m = 0 to 10 {
+  for %m = 0 to 10 {
     // CHECK: affine.if [[SET_EMPTY_1D]](%i{{[0-9]+}})
     affine.if (d0) : (d0 mod 2 - 3 == 0) (%m) {
       "foo"() : () -> ()
diff --git a/mlir/test/Transforms/strip-debuginfo.mlir b/mlir/test/Transforms/strip-debuginfo.mlir
index 181481279d0..fdabd5d12e0 100644
--- a/mlir/test/Transforms/strip-debuginfo.mlir
+++ b/mlir/test/Transforms/strip-debuginfo.mlir
@@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
   %1 = "foo"() : () -> i32 loc("foo")
 
   // CHECK: } loc(unknown)
-  affine.for %i0 = 0 to 8 {
+  for %i0 = 0 to 8 {
   } loc(fused["foo", "mysource.cc":10:8])
 
   // CHECK: } loc(unknown)
diff --git a/mlir/test/Transforms/unroll-jam.mlir b/mlir/test/Transforms/unroll-jam.mlir
index 98d284aeede..da4f965676f 100644
--- a/mlir/test/Transforms/unroll-jam.mlir
+++ b/mlir/test/Transforms/unroll-jam.mlir
@@ -7,13 +7,13 @@
 // CHECK-LABEL: func @unroll_jam_imperfect_nest() {
 func @unroll_jam_imperfect_nest() {
   // CHECK: %c100 = constant 100 : index
-  // CHECK-NEXT: affine.for %i0 = 0 to 99 step 2 {
-  affine.for %i = 0 to 101 {
+  // CHECK-NEXT: for %i0 = 0 to 99 step 2 {
+  for %i = 0 to 101 {
     // CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
     // CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
     // CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
     %x = "addi32"(%i, %i) : (index, index) -> i32
-    affine.for %j = 0 to 17 {
+    for %j = 0 to 17 {
       // CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
       // CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
       // CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
@@ -29,7 +29,7 @@ func @unroll_jam_imperfect_nest() {
   } // CHECK }
   // cleanup loop (single iteration)
   // CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
-  // CHECK-NEXT: affine.for %i2 = 0 to 17 {
+  // CHECK-NEXT: for %i2 = 0 to 17 {
     // CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
     // CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
   // CHECK-NEXT: }
@@ -39,8 +39,8 @@ func @unroll_jam_imperfect_nest() {
 
 // UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
 func @loop_nest_unknown_count_1(%N : index) {
-  // UNROLL-BY-4-NEXT: affine.for %i0 = 1 to  #map{{[0-9]+}}()[%arg0] step 4 {
-    // UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: for %i0 = 1 to  #map{{[0-9]+}}()[%arg0] step 4 {
+    // UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
       // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
       // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
       // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@@ -48,14 +48,14 @@ func @loop_nest_unknown_count_1(%N : index) {
     // UNROLL-BY-4-NEXT: }
   // UNROLL-BY-4-NEXT: }
   // A cleanup loop should be generated here.
-  // UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
-    // UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
+    // UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
       // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
     // UNROLL-BY-4_NEXT: }
   // UNROLL-BY-4_NEXT: }
   // Specify the lower bound in a form so that both lb and ub operands match.
-  affine.for %i = ()[s0] -> (1)()[%N] to %N {
-    affine.for %j = 1 to 100 {
+  for %i = ()[s0] -> (1)()[%N] to %N {
+    for %j = 1 to 100 {
       %x = "foo"() : () -> i32
     }
   }
@@ -64,8 +64,8 @@ func @loop_nest_unknown_count_1(%N : index) {
 
 // UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
 func @loop_nest_unknown_count_2(%arg : index) {
-  // UNROLL-BY-4-NEXT: affine.for %i0 = %arg0 to  #map{{[0-9]+}}()[%arg0] step 4 {
-    // UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: for %i0 = %arg0 to  #map{{[0-9]+}}()[%arg0] step 4 {
+    // UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
       // UNROLL-BY-4-NEXT: %0 = "foo"(%i0) : (index) -> i32
       // UNROLL-BY-4-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
       // UNROLL-BY-4-NEXT: %2 = "foo"(%1) : (index) -> i32
@@ -77,12 +77,12 @@ func @loop_nest_unknown_count_2(%arg : index) {
   // UNROLL-BY-4-NEXT: }
   // The cleanup loop is a single iteration one and is promoted.
   // UNROLL-BY-4-NEXT: %7 = affine.apply [[M1:#map{{[0-9]+}}]]()[%arg0]
-  // UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
     // UNROLL-BY-4-NEXT: %8 = "foo"() : () -> i32
   // UNROLL-BY-4_NEXT: }
   // Specify the lower bound in a form so that both lb and ub operands match.
-  affine.for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
-    affine.for %j = 1 to 100 {
+  for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
+    for %j = 1 to 100 {
       %x = "foo"(%i) : (index) -> i32
     }
   }
diff --git a/mlir/test/Transforms/unroll.mlir b/mlir/test/Transforms/unroll.mlir
index 013f65367cb..c023561faa8 100644
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@@ -46,13 +46,13 @@
 
 // CHECK-LABEL: func @loop_nest_simplest() {
 func @loop_nest_simplest() {
-  // CHECK: affine.for %i0 = 0 to 100 step 2 {
-  affine.for %i = 0 to 100 step 2 {
+  // CHECK: for %i0 = 0 to 100 step 2 {
+  for %i = 0 to 100 step 2 {
     // CHECK: %c1_i32 = constant 1 : i32
     // CHECK-NEXT: %c1_i32_0 = constant 1 : i32
     // CHECK-NEXT: %c1_i32_1 = constant 1 : i32
     // CHECK-NEXT: %c1_i32_2 = constant 1 : i32
-    affine.for %j = 0 to 4 {
+    for %j = 0 to 4 {
       %x = constant 1 : i32
     }
   }       // CHECK:  }
@@ -62,8 +62,8 @@ func @loop_nest_simplest() {
 // CHECK-LABEL: func @loop_nest_simple_iv_use() {
 func @loop_nest_simple_iv_use() {
   // CHECK: %c0 = constant 0 : index
-  // CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
-  affine.for %i = 0 to 100 step 2 {
+  // CHECK-NEXT: for %i0 = 0 to 100 step 2 {
+  for %i = 0 to 100 step 2 {
     // CHECK: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
     // CHECK: %1 = affine.apply [[MAP0]](%c0)
     // CHECK-NEXT:  %2 = "addi32"(%1, %1) : (index, index) -> i32
@@ -71,7 +71,7 @@ func @loop_nest_simple_iv_use() {
     // CHECK-NEXT:  %4 = "addi32"(%3, %3) : (index, index) -> i32
     // CHECK: %5 = affine.apply [[MAP2]](%c0)
     // CHECK-NEXT:  %6 = "addi32"(%5, %5) : (index, index) -> i32
-    affine.for %j = 0 to 4 {
+    for %j = 0 to 4 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
     }
   }       // CHECK:  }
@@ -82,8 +82,8 @@ func @loop_nest_simple_iv_use() {
 // CHECK-LABEL: func @loop_nest_body_def_use() {
 func @loop_nest_body_def_use() {
   // CHECK: %c0 = constant 0 : index
-  // CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
-  affine.for %i = 0 to 100 step 2 {
+  // CHECK-NEXT: for %i0 = 0 to 100 step 2 {
+  for %i = 0 to 100 step 2 {
     // CHECK: %c0_0 = constant 0 : index
     %c0 = constant 0 : index
     // CHECK:      %0 = affine.apply [[MAP0]](%c0)
@@ -97,7 +97,7 @@ func @loop_nest_body_def_use() {
     // CHECK-NEXT: %8 = affine.apply [[MAP2]](%c0)
     // CHECK-NEXT: %9 = affine.apply [[MAP0]](%8)
     // CHECK-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
-    affine.for %j = 0 to 4 {
+    for %j = 0 to 4 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %y = "addi32"(%x, %c0) : (index, index) -> index
@@ -110,14 +110,14 @@ func @loop_nest_body_def_use() {
 func @loop_nest_strided() {
   // CHECK: %c2 = constant 2 : index
   // CHECK-NEXT: %c2_0 = constant 2 : index
-  // CHECK-NEXT: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
+  // CHECK-NEXT: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
     // CHECK:      %0 = affine.apply [[MAP0]](%c2_0)
     // CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
     // CHECK-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
     // CHECK-NEXT: %3 = affine.apply [[MAP0]](%2)
     // CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
-    affine.for %j = 2 to 6 step 2 {
+    for %j = 2 to 6 step 2 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %y = "addi32"(%x, %x) : (index, index) -> index
@@ -130,7 +130,7 @@ func @loop_nest_strided() {
     // CHECK-NEXT: %10 = affine.apply [[MAP3]](%c2)
     // CHECK-NEXT: %11 = affine.apply [[MAP0]](%10)
     // CHECK-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
-    affine.for %k = 2 to 7 step 2 {
+    for %k = 2 to 7 step 2 {
       %z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %w = "addi32"(%z, %z) : (index, index) -> index
@@ -142,8 +142,8 @@ func @loop_nest_strided() {
 // CHECK-LABEL: func @loop_nest_multiple_results() {
 func @loop_nest_multiple_results() {
   // CHECK: %c0 = constant 0 : index
-  // CHECK-NEXT: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
+  // CHECK-NEXT: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
     // CHECK: %0 = affine.apply [[MAP4]](%i0, %c0)
     // CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
     // CHECK-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
@@ -153,7 +153,7 @@ func @loop_nest_multiple_results() {
     // CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
     // CHECK-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
     // CHECK-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
-    affine.for %j = 0 to 2 step 1 {
+    for %j = 0 to 2 step 1 {
       %x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
       %y = "addi32"(%x, %x) : (index, index) -> index
       %z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
@@ -170,8 +170,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
   // CHECK: %c0 = constant 0 : index
   // CHECK-NEXT: %c128 = constant 128 : index
   %c128 = constant 128 : index
-  // CHECK: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
+  // CHECK: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
     // CHECK: %0 = "vld"(%i0) : (index) -> i32
     %ld = "vld"(%i) : (index) -> i32
     // CHECK: %1 = affine.apply [[MAP0]](%c0)
@@ -189,7 +189,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
     // CHECK-NEXT: %13 = affine.apply [[MAP0]](%12)
     // CHECK-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
     // CHECK-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
-    affine.for %j = 0 to 4 {
+    for %j = 0 to 4 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
        %y = "vmulf"(%j, %x) : (index, index) -> index
@@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
   // CHECK-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
   // CHECK-NEXT: %6 = affine.apply [[MAP0]](%5)
   // CHECK-NEXT: "mul"(%6, %6) : (index, index) -> ()
-  affine.for %j = 0 to 4 {
+  for %j = 0 to 4 {
     %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
       (index) -> (index)
     "mul"(%x, %x) : (index, index) -> ()
@@ -226,8 +226,8 @@ func @loop_nest_seq_multiple() {
 
   // CHECK: %c99 = constant 99 : index
   %k = "constant"(){value: 99} : () -> index
-  // CHECK: affine.for %i0 = 0 to 100 step 2 {
-  affine.for %m = 0 to 100 step 2 {
+  // CHECK: for %i0 = 0 to 100 step 2 {
+  for %m = 0 to 100 step 2 {
     // CHECK: %7 = affine.apply [[MAP0]](%c0)
     // CHECK-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
     // CHECK-NEXT: %9 = affine.apply [[MAP0]](%c0)
@@ -239,7 +239,7 @@ func @loop_nest_seq_multiple() {
     // CHECK-NEXT: %15 = affine.apply [[MAP2]](%c0)
     // CHECK-NEXT: %16 = affine.apply [[MAP0]](%15)
     // CHECK-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
-    affine.for %n = 0 to 4 {
+    for %n = 0 to 4 {
       %y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
@@ -251,16 +251,16 @@ func @loop_nest_seq_multiple() {
 
 // SHORT-LABEL: func @loop_nest_outer_unroll() {
 func @loop_nest_outer_unroll() {
-  // SHORT:      affine.for %i0 = 0 to 4 {
+  // SHORT:      for %i0 = 0 to 4 {
   // SHORT-NEXT:   %0 = affine.apply [[MAP0]](%i0)
   // SHORT-NEXT:   %1 = "addi32"(%0, %0) : (index, index) -> index
   // SHORT-NEXT: }
-  // SHORT-NEXT: affine.for %i1 = 0 to 4 {
+  // SHORT-NEXT: for %i1 = 0 to 4 {
   // SHORT-NEXT:   %2 = affine.apply [[MAP0]](%i1)
   // SHORT-NEXT:   %3 = "addi32"(%2, %2) : (index, index) -> index
   // SHORT-NEXT: }
-  affine.for %i = 0 to 2 {
-    affine.for %j = 0 to 4 {
+  for %i = 0 to 2 {
+    for %j = 0 to 4 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %y = "addi32"(%x, %x) : (index, index) -> index
@@ -284,28 +284,28 @@ func @loop_nest_seq_long() -> i32 {
 
   %zero_idx = constant 0 : index
 
-  affine.for %n0 = 0 to 512 {
-    affine.for %n1 = 0 to 8 {
+  for %n0 = 0 to 512 {
+    for %n1 = 0 to 8 {
       store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
       store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
       store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
     }
   }
 
-  affine.for %i0 = 0 to 2 {
-    affine.for %i1 = 0 to 2 {
-      affine.for %i2 = 0 to 8 {
+  for %i0 = 0 to 2 {
+    for %i1 = 0 to 2 {
+      for %i2 = 0 to 8 {
         %b2 = "affine.apply" (%i1, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
         %x = load %B[%i0, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
         "op1"(%x) : (i32) -> ()
       }
-      affine.for %j1 = 0 to 8 {
-        affine.for %j2 = 0 to 8 {
+      for %j1 = 0 to 8 {
+        for %j2 = 0 to 8 {
           %a2 = "affine.apply" (%i1, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
           %v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
           "op2"(%v203) : (i32) -> ()
         }
-        affine.for %k2 = 0 to 8 {
+        for %k2 = 0 to 8 {
           %s0 = "op3"() : () -> i32
           %c2 = "affine.apply" (%i0, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
           %s1 =  load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
@@ -322,8 +322,8 @@ func @loop_nest_seq_long() -> i32 {
 
 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
 func @unroll_unit_stride_no_cleanup() {
-  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
+  // UNROLL-BY-4: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
     // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
     // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@@ -337,13 +337,13 @@ func @unroll_unit_stride_no_cleanup() {
     // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
     // UNROLL-BY-4-NEXT: }
-    affine.for %j = 0 to 8 {
+    for %j = 0 to 8 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
       %y = "addi32"(%x, %x) : (i32, i32) -> i32
     }
     // empty loop
-    // UNROLL-BY-4: affine.for %i2 = 0 to 8 {
-    affine.for %k = 0 to 8 {
+    // UNROLL-BY-4: for %i2 = 0 to 8 {
+    for %k = 0 to 8 {
     }
   }
   return
@@ -351,8 +351,8 @@ func @unroll_unit_stride_no_cleanup() {
 
 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
 func @unroll_unit_stride_cleanup() {
-  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
+  // UNROLL-BY-4: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
     // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 7 step 4 {
     // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@@ -370,7 +370,7 @@ func @unroll_unit_stride_cleanup() {
     // UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
     // UNROLL-BY-4-NEXT: }
-    affine.for %j = 0 to 10 {
+    for %j = 0 to 10 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
       %y = "addi32"(%x, %x) : (i32, i32) -> i32
     }
@@ -380,8 +380,8 @@ func @unroll_unit_stride_cleanup() {
 
 // UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
 func @unroll_non_unit_stride_cleanup() {
-  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
+  // UNROLL-BY-4: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
     // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 37 step 20 {
     // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@@ -399,7 +399,7 @@ func @unroll_non_unit_stride_cleanup() {
     // UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
     // UNROLL-BY-4-NEXT: }
-    affine.for %j = 2 to 48 step 5 {
+    for %j = 2 to 48 step 5 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
       %y = "addi32"(%x, %x) : (i32, i32) -> i32
     }
@@ -411,8 +411,8 @@ func @unroll_non_unit_stride_cleanup() {
 func @loop_nest_single_iteration_after_unroll(%N: index) {
   // UNROLL-BY-4: %c0 = constant 0 : index
   // UNROLL-BY-4: %c4 = constant 4 : index
-  // UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
-  affine.for %i = 0 to %N {
+  // UNROLL-BY-4: for %i0 = 0 to %arg0 {
+  for %i = 0 to %N {
     // UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
     // UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@@ -422,7 +422,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
     // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
     // UNROLL-BY-4-NOT: for
-    affine.for %j = 0 to 5 {
+    for %j = 0 to 5 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
     } // UNROLL-BY-4-NOT: }
   } // UNROLL-BY-4:  }
@@ -434,8 +434,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
 func @loop_nest_operand1() {
-// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   affine.for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
+// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
 // UNROLL-BY-4-NEXT:      %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %2 = "foo"() : () -> i32
@@ -443,8 +443,8 @@ func @loop_nest_operand1() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  affine.for %i = 0 to 100 step 2 {
-    affine.for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
+  for %i = 0 to 100 step 2 {
+    for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
       %x = "foo"() : () -> i32
     }
   }
@@ -454,8 +454,8 @@ func @loop_nest_operand1() {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
 func @loop_nest_operand2() {
-// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
 // UNROLL-BY-4-NEXT:     %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %2 = "foo"() : () -> i32
@@ -463,8 +463,8 @@ func @loop_nest_operand2() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  affine.for %i = 0 to 100 step 2 {
-    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
+  for %i = 0 to 100 step 2 {
+    for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
       %x = "foo"() : () -> i32
     }
   }
@@ -475,16 +475,16 @@ func @loop_nest_operand2() {
 // factor. The cleanup loop happens to be a single iteration one and is promoted.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
 func @loop_nest_operand3() {
-  // UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
-  affine.for %i = 0 to 100 step 2 {
-    // UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+  // UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
+  for %i = 0 to 100 step 2 {
+    // UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
     // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: }
     // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
-    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
+    for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
       %x = "foo"() : () -> i32
     }
   } // UNROLL-BY-4: }
@@ -493,20 +493,20 @@ func @loop_nest_operand3() {
 
 // UNROLL-BY-4-LABEL: func @loop_nest_operand4(%arg0: index) {
 func @loop_nest_operand4(%N : index) {
-  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
-  affine.for %i = 0 to 100 {
-    // UNROLL-BY-4: affine.for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
+  // UNROLL-BY-4: for %i0 = 0 to 100 {
+  for %i = 0 to 100 {
+    // UNROLL-BY-4: for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
     // UNROLL-BY-4: %0 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: }
     // A cleanup loop will be be generated here.
-    // UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
+    // UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
     // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
     // UNROLL-BY-4_NEXT: }
     // Specify the lower bound so that both lb and ub operands match.
-    affine.for %j = ()[s0] -> (0)()[%N] to %N {
+    for %j = ()[s0] -> (0)()[%N] to %N {
       %x = "foo"() : () -> i32
     }
   }
@@ -518,7 +518,7 @@ func @loop_nest_unroll_full() {
   // CHECK-NEXT: %0 = "foo"() : () -> i32
   // CHECK-NEXT: %1 = "bar"() : () -> i32
   // CHECK-NEXT:  return
-  affine.for %i = 0 to 1 {
+  for %i = 0 to 1 {
     %x = "foo"() : () -> i32
     %y = "bar"() : () -> i32
   }
@@ -527,7 +527,7 @@ func @loop_nest_unroll_full() {
 
 // UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
 func @unroll_by_one_should_promote_single_iteration_loop() {
-  affine.for %i = 0 to 1 {
+  for %i = 0 to 1 {
     %x = "foo"(%i) : (index) -> i32
   }
   return
diff --git a/mlir/utils/emacs/mlir-mode.el b/mlir/utils/emacs/mlir-mode.el
index 8918890b8be..efc61cbe92a 100644
--- a/mlir/utils/emacs/mlir-mode.el
+++ b/mlir/utils/emacs/mlir-mode.el
@@ -42,7 +42,7 @@
    ;; Keywords
    `(,(regexp-opt
        '(;; Toplevel entities
-         "br" "ceildiv" "func" "cond_br" "else" "extfunc" "false" "floordiv" "affine.for" "affine.if" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
+         "br" "ceildiv" "func" "cond_br" "else" "extfunc" "false" "floordiv" "for" "affine.if" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
   "Syntax highlighting for MLIR.")
 
 ;; Emacs 23 compatibility.
diff --git a/mlir/utils/vim/mlir.vim b/mlir/utils/vim/mlir.vim
index 0e2797f5603..91478d62136 100644
--- a/mlir/utils/vim/mlir.vim
+++ b/mlir/utils/vim/mlir.vim
@@ -10,9 +10,9 @@ syn keyword mlirType index i1 i2 i4 i8 i13 i16 i32 i64
       \ f16 f32 tf_control
 syn keyword mlirType memref tensor vector
 
-syntax keyword mlirKeywords extfunc func to step return
+syntax keyword mlirKeywords extfunc cfgfunc mlfunc for to step return
 syntax keyword mlirConditional affine.if else
-syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply affine.for call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
+syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
 
 syn match mlirInt "-\=\<\d\+\>"
 syn match mlirFloat "-\=\<\d\+\.\d\+\>"
author	Uday Bondhugula <bondhugula@google.com>	2019-02-06 21:54:18 -0800
committer	jpienaar <jpienaar@google.com>	2019-03-29 16:19:33 -0700
commit	4ba8c9147d04d82d629dde4730e1dd5d4ae4123d (patch)
tree	b16681ef2a8f2327993d1f4b75596ce100732403 /mlir
parent	99fee0b181106a0213501800c6076aec95afa46c (diff)
download	bcm5719-llvm-4ba8c9147d04d82d629dde4730e1dd5d4ae4123d.tar.gz bcm5719-llvm-4ba8c9147d04d82d629dde4730e1dd5d4ae4123d.zip