Skip to content

Commit 891a2c3

Browse files
authored
[AArch64] Change IssueWidth to 6 in AArch64SchedNeoverseV2.td (llvm#142565)
I think that the issue width for neoverse-v2 CPUs is set too high and does not properly reflect the dispatch constraints. I tested various values of IssueWidth (16, 8 and 6) with runs of SPEC2017 on a neoverse-v2 machine and I got the highest overall geomean score with an issue width of 6, although it's only a marginal 0.14% improvement. I also observed a 1-2% improvement when testing the Gromacs application with some workloads. Here are some notable changes in SPEC2017 ref runtimes, i.e. has a ~0.5% change or greater ('-' means faster): 548.exchange2: -1.7% 510.parest: -0.78% 538.imagick: -0.73% 500.perlbench: -0.57% 525.x264: -0.55% 507.cactuBSSN: -0.5% 520.omnetpp: -0.48% 511.povray: +0.57% 544.nab: +0.65% 503.bwaves: +0.68% 526.blender: +0.75% If this patch causes any major regressions post-commit it can be easily reverted, but I think it should be an overall improvement.
1 parent 8631cdd commit 891a2c3

File tree

7 files changed

+1413
-1409
lines changed

7 files changed

+1413
-1409
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
def NeoverseV2Model : SchedMachineModel {
17-
let IssueWidth = 16; // Micro-ops dispatched at a time.
17+
let IssueWidth = 6; // This value comes from the decode bandwidth
18+
// and empirical measurements showed that a
19+
// lower value is better.
1820
let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer.
1921
let LoadLatency = 4; // Optimistic load latency.
2022
let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.

llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,14 +2536,14 @@ drps
25362536
# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
25372537
# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
25382538
# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
2539-
# CHECK-NEXT: 1 0 0.06 mov x3, x6
2540-
# CHECK-NEXT: 1 0 0.06 mov x3, xzr
2541-
# CHECK-NEXT: 1 0 0.06 mov wzr, w2
2542-
# CHECK-NEXT: 1 0 0.06 mov w3, w5
2539+
# CHECK-NEXT: 1 0 0.17 mov x3, x6
2540+
# CHECK-NEXT: 1 0 0.17 mov x3, xzr
2541+
# CHECK-NEXT: 1 0 0.17 mov wzr, w2
2542+
# CHECK-NEXT: 1 0 0.17 mov w3, w5
25432543
# CHECK-NEXT: 1 1 0.17 movz w2, #0, lsl #16
25442544
# CHECK-NEXT: 1 1 0.17 mov w2, #-1235
25452545
# CHECK-NEXT: 1 1 0.17 mov x2, #5299989643264
2546-
# CHECK-NEXT: 1 0 0.06 mov x2, #0
2546+
# CHECK-NEXT: 1 0 0.17 mov x2, #0
25472547
# CHECK-NEXT: 1 1 0.17 movk w3, #0
25482548
# CHECK-NEXT: 1 1 0.17 movz x4, #0, lsl #16
25492549
# CHECK-NEXT: 1 1 0.17 movk w5, #0, lsl #16

llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-clear-upper-regs.s

Lines changed: 62 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ add z0.s, z0.s, z0.s
5858
# CHECK-NEXT: Total Cycles: 44
5959
# CHECK-NEXT: Total uOps: 200
6060

61-
# CHECK: Dispatch Width: 16
61+
# CHECK: Dispatch Width: 6
6262
# CHECK-NEXT: uOps Per Cycle: 4.55
6363
# CHECK-NEXT: IPC: 4.55
6464
# CHECK-NEXT: Block RThroughput: 0.3
@@ -116,8 +116,8 @@ add z0.s, z0.s, z0.s
116116
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
117117
# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr b0, [sp]
118118
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
119-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr b0, [sp]
120-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
119+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ldr b0, [sp]
120+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
121121

122122
# CHECK: Average Wait times (based on the timeline view):
123123
# CHECK-NEXT: [0]: Executions
@@ -126,9 +126,9 @@ add z0.s, z0.s, z0.s
126126
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
127127

128128
# CHECK: [0] [1] [2] [3]
129-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr b0, [sp]
130-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
131-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
129+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ldr b0, [sp]
130+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
131+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
132132

133133
# CHECK: [1] Code Region - FPR16-bit
134134

@@ -137,7 +137,7 @@ add z0.s, z0.s, z0.s
137137
# CHECK-NEXT: Total Cycles: 44
138138
# CHECK-NEXT: Total uOps: 200
139139

140-
# CHECK: Dispatch Width: 16
140+
# CHECK: Dispatch Width: 6
141141
# CHECK-NEXT: uOps Per Cycle: 4.55
142142
# CHECK-NEXT: IPC: 4.55
143143
# CHECK-NEXT: Block RThroughput: 0.3
@@ -195,8 +195,8 @@ add z0.s, z0.s, z0.s
195195
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
196196
# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr h0, [sp]
197197
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
198-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr h0, [sp]
199-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
198+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ldr h0, [sp]
199+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
200200

201201
# CHECK: Average Wait times (based on the timeline view):
202202
# CHECK-NEXT: [0]: Executions
@@ -205,9 +205,9 @@ add z0.s, z0.s, z0.s
205205
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
206206

207207
# CHECK: [0] [1] [2] [3]
208-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr h0, [sp]
209-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
210-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
208+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ldr h0, [sp]
209+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
210+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
211211

212212
# CHECK: [2] Code Region - FPR32-bit
213213

@@ -216,7 +216,7 @@ add z0.s, z0.s, z0.s
216216
# CHECK-NEXT: Total Cycles: 44
217217
# CHECK-NEXT: Total uOps: 200
218218

219-
# CHECK: Dispatch Width: 16
219+
# CHECK: Dispatch Width: 6
220220
# CHECK-NEXT: uOps Per Cycle: 4.55
221221
# CHECK-NEXT: IPC: 4.55
222222
# CHECK-NEXT: Block RThroughput: 0.3
@@ -274,8 +274,8 @@ add z0.s, z0.s, z0.s
274274
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
275275
# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr s0, [sp]
276276
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
277-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr s0, [sp]
278-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
277+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ldr s0, [sp]
278+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
279279

280280
# CHECK: Average Wait times (based on the timeline view):
281281
# CHECK-NEXT: [0]: Executions
@@ -284,9 +284,9 @@ add z0.s, z0.s, z0.s
284284
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
285285

286286
# CHECK: [0] [1] [2] [3]
287-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr s0, [sp]
288-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
289-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
287+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ldr s0, [sp]
288+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
289+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
290290

291291
# CHECK: [3] Code Region - FPR64-bit
292292

@@ -295,7 +295,7 @@ add z0.s, z0.s, z0.s
295295
# CHECK-NEXT: Total Cycles: 44
296296
# CHECK-NEXT: Total uOps: 200
297297

298-
# CHECK: Dispatch Width: 16
298+
# CHECK: Dispatch Width: 6
299299
# CHECK-NEXT: uOps Per Cycle: 4.55
300300
# CHECK-NEXT: IPC: 4.55
301301
# CHECK-NEXT: Block RThroughput: 0.3
@@ -353,8 +353,8 @@ add z0.s, z0.s, z0.s
353353
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
354354
# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr d0, [sp]
355355
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
356-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr d0, [sp]
357-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
356+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ldr d0, [sp]
357+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
358358

359359
# CHECK: Average Wait times (based on the timeline view):
360360
# CHECK-NEXT: [0]: Executions
@@ -363,9 +363,9 @@ add z0.s, z0.s, z0.s
363363
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
364364

365365
# CHECK: [0] [1] [2] [3]
366-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr d0, [sp]
367-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
368-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
366+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ldr d0, [sp]
367+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
368+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
369369

370370
# CHECK: [4] Code Region - FPR128-bit
371371

@@ -374,7 +374,7 @@ add z0.s, z0.s, z0.s
374374
# CHECK-NEXT: Total Cycles: 44
375375
# CHECK-NEXT: Total uOps: 200
376376

377-
# CHECK: Dispatch Width: 16
377+
# CHECK: Dispatch Width: 6
378378
# CHECK-NEXT: uOps Per Cycle: 4.55
379379
# CHECK-NEXT: IPC: 4.55
380380
# CHECK-NEXT: Block RThroughput: 0.3
@@ -432,8 +432,8 @@ add z0.s, z0.s, z0.s
432432
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
433433
# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr q0, [sp]
434434
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
435-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr q0, [sp]
436-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
435+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ldr q0, [sp]
436+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
437437

438438
# CHECK: Average Wait times (based on the timeline view):
439439
# CHECK-NEXT: [0]: Executions
@@ -442,9 +442,9 @@ add z0.s, z0.s, z0.s
442442
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
443443

444444
# CHECK: [0] [1] [2] [3]
445-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr q0, [sp]
446-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
447-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
445+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ldr q0, [sp]
446+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
447+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
448448

449449
# CHECK: [5] Code Region - SIMD64-bit-b
450450

@@ -453,7 +453,7 @@ add z0.s, z0.s, z0.s
453453
# CHECK-NEXT: Total Cycles: 44
454454
# CHECK-NEXT: Total uOps: 200
455455

456-
# CHECK: Dispatch Width: 16
456+
# CHECK: Dispatch Width: 6
457457
# CHECK-NEXT: uOps Per Cycle: 4.55
458458
# CHECK-NEXT: IPC: 4.55
459459
# CHECK-NEXT: Block RThroughput: 0.3
@@ -511,8 +511,8 @@ add z0.s, z0.s, z0.s
511511
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
512512
# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.8b }, [sp]
513513
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
514-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.8b }, [sp]
515-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
514+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ld1 { v0.8b }, [sp]
515+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
516516

517517
# CHECK: Average Wait times (based on the timeline view):
518518
# CHECK-NEXT: [0]: Executions
@@ -521,9 +521,9 @@ add z0.s, z0.s, z0.s
521521
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
522522

523523
# CHECK: [0] [1] [2] [3]
524-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.8b }, [sp]
525-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
526-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
524+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ld1 { v0.8b }, [sp]
525+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
526+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
527527

528528
# CHECK: [6] Code Region - SIMD64-bit-h
529529

@@ -532,7 +532,7 @@ add z0.s, z0.s, z0.s
532532
# CHECK-NEXT: Total Cycles: 44
533533
# CHECK-NEXT: Total uOps: 200
534534

535-
# CHECK: Dispatch Width: 16
535+
# CHECK: Dispatch Width: 6
536536
# CHECK-NEXT: uOps Per Cycle: 4.55
537537
# CHECK-NEXT: IPC: 4.55
538538
# CHECK-NEXT: Block RThroughput: 0.3
@@ -590,8 +590,8 @@ add z0.s, z0.s, z0.s
590590
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
591591
# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.4h }, [sp]
592592
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
593-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.4h }, [sp]
594-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
593+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ld1 { v0.4h }, [sp]
594+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
595595

596596
# CHECK: Average Wait times (based on the timeline view):
597597
# CHECK-NEXT: [0]: Executions
@@ -600,9 +600,9 @@ add z0.s, z0.s, z0.s
600600
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
601601

602602
# CHECK: [0] [1] [2] [3]
603-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.4h }, [sp]
604-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
605-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
603+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ld1 { v0.4h }, [sp]
604+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
605+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
606606

607607
# CHECK: [7] Code Region - SIMD64-bit-s
608608

@@ -611,7 +611,7 @@ add z0.s, z0.s, z0.s
611611
# CHECK-NEXT: Total Cycles: 44
612612
# CHECK-NEXT: Total uOps: 200
613613

614-
# CHECK: Dispatch Width: 16
614+
# CHECK: Dispatch Width: 6
615615
# CHECK-NEXT: uOps Per Cycle: 4.55
616616
# CHECK-NEXT: IPC: 4.55
617617
# CHECK-NEXT: Block RThroughput: 0.3
@@ -669,8 +669,8 @@ add z0.s, z0.s, z0.s
669669
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
670670
# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.2s }, [sp]
671671
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
672-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.2s }, [sp]
673-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
672+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ld1 { v0.2s }, [sp]
673+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
674674

675675
# CHECK: Average Wait times (based on the timeline view):
676676
# CHECK-NEXT: [0]: Executions
@@ -679,9 +679,9 @@ add z0.s, z0.s, z0.s
679679
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
680680

681681
# CHECK: [0] [1] [2] [3]
682-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.2s }, [sp]
683-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
684-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
682+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ld1 { v0.2s }, [sp]
683+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
684+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
685685

686686
# CHECK: [8] Code Region - SIMD64-bit-d
687687

@@ -690,7 +690,7 @@ add z0.s, z0.s, z0.s
690690
# CHECK-NEXT: Total Cycles: 44
691691
# CHECK-NEXT: Total uOps: 200
692692

693-
# CHECK: Dispatch Width: 16
693+
# CHECK: Dispatch Width: 6
694694
# CHECK-NEXT: uOps Per Cycle: 4.55
695695
# CHECK-NEXT: IPC: 4.55
696696
# CHECK-NEXT: Block RThroughput: 0.3
@@ -748,8 +748,8 @@ add z0.s, z0.s, z0.s
748748
# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
749749
# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.1d }, [sp]
750750
# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
751-
# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.1d }, [sp]
752-
# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
751+
# CHECK-NEXT: [3,0] .DeeeeeeE-R. ld1 { v0.1d }, [sp]
752+
# CHECK-NEXT: [3,1] .D======eeER add z0.d, z0.d, z0.d
753753

754754
# CHECK: Average Wait times (based on the timeline view):
755755
# CHECK-NEXT: [0]: Executions
@@ -758,9 +758,9 @@ add z0.s, z0.s, z0.s
758758
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
759759

760760
# CHECK: [0] [1] [2] [3]
761-
# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.1d }, [sp]
762-
# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
763-
# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
761+
# CHECK-NEXT: 0. 4 1.0 1.0 1.3 ld1 { v0.1d }, [sp]
762+
# CHECK-NEXT: 1. 4 7.0 0.0 0.0 add z0.d, z0.d, z0.d
763+
# CHECK-NEXT: 4 4.0 0.5 0.6 <total>
764764

765765
# CHECK: [9] Code Region - insr
766766

@@ -769,7 +769,7 @@ add z0.s, z0.s, z0.s
769769
# CHECK-NEXT: Total Cycles: 803
770770
# CHECK-NEXT: Total uOps: 300
771771

772-
# CHECK: Dispatch Width: 16
772+
# CHECK: Dispatch Width: 6
773773
# CHECK-NEXT: uOps Per Cycle: 0.37
774774
# CHECK-NEXT: IPC: 0.25
775775
# CHECK-NEXT: Block RThroughput: 1.0
@@ -825,10 +825,10 @@ add z0.s, z0.s, z0.s
825825
# CHECK-NEXT: [0,1] D======eeER . . . . . add z0.s, z0.s, z0.s
826826
# CHECK-NEXT: [1,0] D========eeeeeeER . . . . insr z0.s, w0
827827
# CHECK-NEXT: [1,1] D==============eeER . . . . add z0.s, z0.s, z0.s
828-
# CHECK-NEXT: [2,0] D================eeeeeeER. . . insr z0.s, w0
829-
# CHECK-NEXT: [2,1] D======================eeER . . add z0.s, z0.s, z0.s
830-
# CHECK-NEXT: [3,0] D========================eeeeeeER . insr z0.s, w0
831-
# CHECK-NEXT: [3,1] D==============================eeER add z0.s, z0.s, z0.s
828+
# CHECK-NEXT: [2,0] .D===============eeeeeeER. . . insr z0.s, w0
829+
# CHECK-NEXT: [2,1] .D=====================eeER . . add z0.s, z0.s, z0.s
830+
# CHECK-NEXT: [3,0] .D=======================eeeeeeER . insr z0.s, w0
831+
# CHECK-NEXT: [3,1] .D=============================eeER add z0.s, z0.s, z0.s
832832

833833
# CHECK: Average Wait times (based on the timeline view):
834834
# CHECK-NEXT: [0]: Executions
@@ -837,6 +837,6 @@ add z0.s, z0.s, z0.s
837837
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
838838

839839
# CHECK: [0] [1] [2] [3]
840-
# CHECK-NEXT: 0. 4 13.0 0.3 0.0 insr z0.s, w0
841-
# CHECK-NEXT: 1. 4 19.0 0.0 0.0 add z0.s, z0.s, z0.s
842-
# CHECK-NEXT: 4 16.0 0.1 0.0 <total>
840+
# CHECK-NEXT: 0. 4 12.5 0.3 0.0 insr z0.s, w0
841+
# CHECK-NEXT: 1. 4 18.5 0.0 0.0 add z0.s, z0.s, z0.s
842+
# CHECK-NEXT: 4 15.5 0.1 0.0 <total>

0 commit comments

Comments
 (0)