llvm/test/CodeGen/X86/misched-matrix.ll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
; RUN:          -misched-topdown -verify-machineinstrs \
; RUN:     | FileCheck %s -check-prefix=TOPDOWN
;
; Verify that the MI scheduler minimizes register pressure for a
; uniform set of bottom-up subtrees (unrolled matrix multiply).
;
; For current top-down heuristics, ensure that some folded imulls have
; been reordered with the stores. This tests the scheduler's cheap
; alias analysis ability (that doesn't require any AliasAnalysis pass).
;
; TOPDOWN: %for.body
; TOPDOWN: movl %{{.*}}, (
; TOPDOWN: imull {{[0-9]*}}(
; TOPDOWN: movl %{{.*}}, 4(
; TOPDOWN: imull {{[0-9]*}}(
; TOPDOWN: movl %{{.*}}, 8(
; TOPDOWN: movl %{{.*}}, 12(
; TOPDOWN: %for.end

define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
entry:
  br label %for.body

for.body:                              ; preds = %for.body, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
  %tmp = load i32* %arrayidx8, align 4, !tbaa !0
  %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
  %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
  %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
  %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
  %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
  %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
  %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
  %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
  %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
  %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
  %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
  %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
  %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
  %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
  %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
  %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
  %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
  %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
  %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
  %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
  %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
  %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
  %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
  %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
  %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
  %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
  %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
  %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
  %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
  %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
  %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
  %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
  %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
  %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
  %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
  %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
  %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
  %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
  %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
  %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
  %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
  %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
  %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
  %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
  %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
  %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
  %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
  %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
  %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
  %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
  %mul = mul nsw i32 %tmp1, %tmp
  %mul.1 = mul nsw i32 %tmp3, %tmp2
  %mul.2 = mul nsw i32 %tmp5, %tmp4
  %mul.3 = mul nsw i32 %tmp7, %tmp6
  %mul.138 = mul nsw i32 %tmp9, %tmp8
  %mul.1.1 = mul nsw i32 %tmp11, %tmp10
  %mul.2.1 = mul nsw i32 %tmp13, %tmp12
  %mul.3.1 = mul nsw i32 %tmp15, %tmp14
  %mul.240 = mul nsw i32 %tmp17, %tmp16
  %mul.1.2 = mul nsw i32 %tmp19, %tmp18
  %mul.2.2 = mul nsw i32 %tmp21, %tmp20
  %mul.3.2 = mul nsw i32 %tmp23, %tmp22
  %mul.342 = mul nsw i32 %tmp25, %tmp24
  %mul.1.3 = mul nsw i32 %tmp27, %tmp26
  %mul.2.3 = mul nsw i32 %tmp29, %tmp28
  %mul.3.3 = mul nsw i32 %tmp31, %tmp30
  %add.1 = add nsw i32 %mul.1, %mul
  %add.2 = add nsw i32 %mul.2, %add.1
  %add.3 = add nsw i32 %mul.3, %add.2
  %add.1.1 = add nsw i32 %mul.1.1, %mul.138
  %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
  %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
  %add.1.2 = add nsw i32 %mul.1.2, %mul.240
  %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
  %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
  %add.1.3 = add nsw i32 %mul.1.3, %mul.342
  %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
  %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
  %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
  store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
  %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
  store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
  %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
  store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
  %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
  store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
  %indvars.iv.next = add i64 %indvars.iv, 1
  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  %exitcond = icmp eq i32 %lftr.wideiv, 4
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                        ; preds = %for.body
  ret void
}

!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}