10 files changed, 108 insertions, 67 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index 2a3d3887ed6..56a9e7022db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -14,7 +14,7 @@ regBankSelected: true
 
 # GCN: global_addrspace
 # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
-# GCN: FLAT_LOAD_DWORD  [[PTR]], 0, 0
+# GCN: FLAT_LOAD_DWORD  [[PTR]], 0, 0, 0
 
 body: |
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index 89be3bde94a..ea435725bf2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -15,7 +15,7 @@ regBankSelected: true
 # GCN: global_addrspace
 # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
 # GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
-# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0
+# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
 
 body: |
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index bc992ed77ff..62b47beb125 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -219,19 +219,19 @@ body:             |
     %34 = V_MOV_B32_e32 63, implicit %exec
 
     %27 = V_AND_B32_e64 %26, %24, implicit %exec
-    FLAT_STORE_DWORD %37, %27, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %28 = V_AND_B32_e64 %24, %26, implicit %exec
-    FLAT_STORE_DWORD %37, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %29 = V_AND_B32_e32 %26, %24, implicit %exec
-    FLAT_STORE_DWORD %37, %29, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %30 = V_AND_B32_e64 %26, %26, implicit %exec
-    FLAT_STORE_DWORD %37, %30, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %31 = V_AND_B32_e64 %34, %34, implicit %exec
-    FLAT_STORE_DWORD %37, %31, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     S_ENDPGM
 
@@ -407,34 +407,34 @@ body:             |
     %27 = S_MOV_B32 -4
 
     %11 = V_LSHLREV_B32_e64 12, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec
-    FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %13 = V_LSHL_B32_e64 %7, 12, implicit %exec
-    FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %14 = V_LSHL_B32_e64 12, %7, implicit %exec
-    FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %15 = V_LSHL_B32_e64 12, %24, implicit %exec
-    FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %22 = V_LSHL_B32_e64 %6, 12, implicit %exec
-    FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %23 = V_LSHL_B32_e64 %6, 32, implicit %exec
-    FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %25 = V_LSHL_B32_e32 %6, %6, implicit %exec
-    FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %26 = V_LSHLREV_B32_e32 11, %24, implicit %exec
-    FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %28 = V_LSHL_B32_e32 %27, %6, implicit %exec
-    FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     S_ENDPGM
 
@@ -615,34 +615,34 @@ body:             |
     %35 = V_MOV_B32_e32 2, implicit %exec
 
     %11 = V_ASHRREV_I32_e64 8, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %13 = V_ASHR_I32_e64 %7, 3, implicit %exec
-    FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %14 = V_ASHR_I32_e64 7, %32, implicit %exec
-    FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %15 = V_ASHR_I32_e64 %27, %24, implicit %exec
-    FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %22 = V_ASHR_I32_e64 %6, 4, implicit %exec
-    FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %23 = V_ASHR_I32_e64 %6, %33, implicit %exec
-    FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %25 = V_ASHR_I32_e32 %34, %34, implicit %exec
-    FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %26 = V_ASHRREV_I32_e32 11, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %28 = V_ASHR_I32_e32 %27, %35, implicit %exec
-    FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     S_ENDPGM
 
@@ -824,34 +824,34 @@ body:             |
     %35 = V_MOV_B32_e32 2, implicit %exec
 
     %11 = V_LSHRREV_B32_e64 8, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %13 = V_LSHR_B32_e64 %7, 3, implicit %exec
-    FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %14 = V_LSHR_B32_e64 7, %32, implicit %exec
-    FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %15 = V_LSHR_B32_e64 %27, %24, implicit %exec
-    FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %22 = V_LSHR_B32_e64 %6, 4, implicit %exec
-    FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %23 = V_LSHR_B32_e64 %6, %33, implicit %exec
-    FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %25 = V_LSHR_B32_e32 %34, %34, implicit %exec
-    FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %26 = V_LSHRREV_B32_e32 11, %10, implicit %exec
-    FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     %28 = V_LSHR_B32_e32 %27, %35, implicit %exec
-    FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
+    FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out)
 
     S_ENDPGM
 
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index ff9fcd1c693..c6fe6debd22 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -246,15 +246,15 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-    FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
     %vgpr3 = V_MOV_B32_e32 0, implicit %exec
-    FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
     %vgpr3 = V_MOV_B32_e32 0, implicit %exec
-    FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
     %vgpr3 = V_MOV_B32_e32 0, implicit %exec
-    FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr
+    FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
     %vgpr3 = V_MOV_B32_e32 0, implicit %exec
-    FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr
+    FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
     %vgpr3 = V_MOV_B32_e32 0, implicit %exec
     S_ENDPGM
 
diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
index 7d6d8a5891c..d6b3d7b14cd 100644
--- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
@@ -57,15 +57,15 @@ body:             |
     %4.sub1 = COPY %3.sub0
     undef %5.sub0 = COPY %4.sub1
     %5.sub1 = COPY %4.sub0
-    FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr
 
     %6 = IMPLICIT_DEF
     undef %7.sub0_sub1 = COPY %6
     %7.sub2 = COPY %3.sub0
-    FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr
 
     %8 = IMPLICIT_DEF
     undef %9.sub0_sub1_sub2 = COPY %8
     %9.sub3 = COPY %3.sub0
-    FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
index 1a0d68d81f9..31024277871 100644
--- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
@@ -58,12 +58,12 @@ body:             |
 
   bb.3:
     %1 = COPY killed %17
-    FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr
     %14 = COPY %1.sub1
     %16 = COPY killed %1.sub0
     undef %15.sub0 = COPY killed %16
     %15.sub1 = COPY killed %14
-    FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr
     S_ENDPGM
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
index cd0d410368c..ba937c927c7 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
@@ -214,26 +214,26 @@ body:             |
     %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
     %16 = REG_SEQUENCE %14, 1, %15, 2
     %18 = COPY %16
-    %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
+    %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
     %60 = V_BFE_U32 %17, 8, 8, implicit %exec
     %61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec
     %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
     %66 = COPY %13
     %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
     %67 = REG_SEQUENCE %70, 1, killed %65, 2
-    FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
+    FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
     %37 = S_ADD_U32 %14, 4, implicit-def %scc
     %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
     %71 = COPY killed %37
     %72 = COPY killed %38
     %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
-    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
+    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
     %73 = V_BFE_U32 %40, 8, 8, implicit %exec
     %74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec
     %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
     %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
     %80 = REG_SEQUENCE %83, 1, killed %78, 2
-    FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
+    FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
     %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
     %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
     %57 = REG_SEQUENCE %55, 1, killed %56, 2
@@ -377,26 +377,26 @@ body:             |
     %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc
     %16 = REG_SEQUENCE %14, 1, %15, 2
     %18 = COPY %16
-    %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
+    %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45)
     %60 = V_BFE_U32 %17, 8, 8, implicit %exec
     %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec
     %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec
     %66 = COPY %13
     %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
     %67 = REG_SEQUENCE %70, 1, killed %65, 2
-    FLAT_STORE_DWORD %67, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
+    FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9)
     %37 = S_ADD_U32 %14, 4, implicit-def %scc
     %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc
     %71 = COPY killed %37
     %72 = COPY killed %38
     %41 = REG_SEQUENCE killed %71, 1, killed %72, 2
-    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
+    %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep)
     %73 = V_BFE_U32 %40, 8, 8, implicit %exec
     %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec
     %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec
     %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec
     %80 = REG_SEQUENCE %83, 1, killed %78, 2
-    FLAT_STORE_DWORD %80, %30, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
+    FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17)
     %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc
     %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc
     %57 = REG_SEQUENCE %55, 1, killed %56, 2
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
index f754415dccb..38662e83b35 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
@@ -51,21 +51,21 @@ name: flat_zero_waitcnt
 body: |
   bb.0:
     successors: %bb.1
-    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
-    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
+    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
+    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
     %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
     S_BRANCH %bb.1
 
   bb.1:
     successors: %bb.2
-    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
-    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
+    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
+    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
     %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
     S_BRANCH %bb.2
 
   bb.2:
-    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
-    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
+    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
+    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
     %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
     S_ENDPGM
 ...
@@ -86,11 +86,11 @@ name: single_fallthrough_successor_no_end_block_wait
 body: |
   bb.0:
     successors: %bb.1
-    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
+    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
 
   bb.1:
     %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
-    FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
     S_ENDPGM
 ...
 ---
@@ -114,15 +114,15 @@ name: single_branch_successor_not_next_block
 body: |
   bb.0:
     successors: %bb.2
-    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr
+    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
    S_BRANCH %bb.2
 
   bb.1:
-    FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr
     S_ENDPGM
 
   bb.2:
      %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
-    FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr
+    FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
     S_ENDPGM
 ...
diff --git a/llvm/test/MC/AMDGPU/flat-gfx9.s b/llvm/test/MC/AMDGPU/flat-gfx9.s
new file mode 100644
index 00000000000..5f93a7371b8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/flat-gfx9.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s
+
+
+flat_load_dword v1, v[3:4] offset:0
+// GCN: flat_load_dword v1, v[3:4]      ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
+
+flat_load_dword v1, v[3:4] offset:-1
+// GCN-ERR: :35: error: failed parsing operand.
+
+// FIXME: Error on VI in wrong column
+flat_load_dword v1, v[3:4] offset:4095
+// GFX9: flat_load_dword v1, v[3:4] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x03,0x00,0x00,0x01]
+// VIERR: :1: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:4096
+// GCNERR: :28: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:4 glc
+// GFX9: flat_load_dword v1, v[3:4] offset:4 glc ; encoding: [0x04,0x00,0x51,0xdc,0x03,0x00,0x00,0x01]
+// VIERR: :1: error: invalid operand for instruction
+
+flat_load_dword v1, v[3:4] offset:4 glc slc
+// GFX9: flat_load_dword v1, v[3:4] offset:4 glc slc ; encoding: [0x04,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
+// VIERR: :1: error: invalid operand for instruction
+
+flat_atomic_add v[3:4], v5 offset:8 slc
+// GFX9: flat_atomic_add v[3:4], v5 offset:8 slc ; encoding: [0x08,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00]
+// VIERR: :1: error: invalid operand for instruction
+
+flat_atomic_swap v[3:4], v5 offset:16
+// GFX9: flat_atomic_swap v[3:4], v5 offset:16 ; encoding: [0x10,0x00,0x00,0xdd,0x03,0x05,0x00,0x00]
+// VIERR: :1: error: invalid operand for instruction
+
+flat_store_dword v[3:4], v1 offset:16
+// GFX9: flat_store_dword v[3:4], v1 offset:16 ; encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
+// VIERR: :1: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/flat.s b/llvm/test/MC/AMDGPU/flat.s
index 4e81799fe9f..d8cad131d1e 100644
--- a/llvm/test/MC/AMDGPU/flat.s
+++ b/llvm/test/MC/AMDGPU/flat.s
@@ -49,9 +49,10 @@ flat_store_dword v[3:4], v1 slc
 
 // FIXME: For atomic instructions, glc must be placed immediately following
 // the data regiser.  These forms aren't currently supported:
+// FIXME: offset:0 required
 // flat_atomic_add v1, v[3:4], v5 slc glc
 
-flat_atomic_add v1 v[3:4], v5 glc slc
+flat_atomic_add v1, v[3:4], v5 offset:0 glc slc
 // NOSI: error:
 // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
 // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]