@@ -108,22 +108,21 @@ define void @matrix_mul_signed(i32 %N, ptr nocapture %C, ptr nocapture readonly
108
108
;
109
109
; CHECK-GI-LABEL: matrix_mul_signed:
110
110
; CHECK-GI: // %bb.0: // %vector.header
111
- ; CHECK-GI-NEXT: sxth w9 , w3
111
+ ; CHECK-GI-NEXT: sxth w8 , w3
112
112
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
113
+ ; CHECK-GI-NEXT: dup v0.4s, w8
113
114
; CHECK-GI-NEXT: sxtw x8, w0
114
- ; CHECK-GI-NEXT: dup v0.4s, w9
115
115
; CHECK-GI-NEXT: and x8, x8, #0xfffffff8
116
+ ; CHECK-GI-NEXT: xtn v0.4h, v0.4s
116
117
; CHECK-GI-NEXT: .LBB1_1: // %vector.body
117
118
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
118
119
; CHECK-GI-NEXT: add x9, x2, w0, sxtw #1
119
120
; CHECK-GI-NEXT: subs x8, x8, #8
120
121
; CHECK-GI-NEXT: ldp d1, d2, [x9]
121
122
; CHECK-GI-NEXT: add x9, x1, w0, sxtw #2
122
123
; CHECK-GI-NEXT: add w0, w0, #8
123
- ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
124
- ; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
125
- ; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
126
- ; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
124
+ ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
125
+ ; CHECK-GI-NEXT: smull v2.4s, v0.4h, v2.4h
127
126
; CHECK-GI-NEXT: stp q1, q2, [x9]
128
127
; CHECK-GI-NEXT: b.ne .LBB1_1
129
128
; CHECK-GI-NEXT: // %bb.2: // %for.end12
@@ -305,40 +304,39 @@ define void @larger_smull(ptr nocapture noundef readonly %x, i16 noundef %y, ptr
305
304
; CHECK-GI-NEXT: b.le .LBB3_7
306
305
; CHECK-GI-NEXT: // %bb.1: // %for.body.preheader
307
306
; CHECK-GI-NEXT: sxth w8, w1
308
- ; CHECK-GI-NEXT: mov x9 , xzr
307
+ ; CHECK-GI-NEXT: mov x10 , xzr
309
308
; CHECK-GI-NEXT: cmp w3, #16
310
- ; CHECK-GI-NEXT: mov w10 , w3
309
+ ; CHECK-GI-NEXT: mov w9 , w3
311
310
; CHECK-GI-NEXT: b.lo .LBB3_5
312
311
; CHECK-GI-NEXT: // %bb.2: // %vector.ph
313
312
; CHECK-GI-NEXT: dup v0.4s, w8
314
- ; CHECK-GI-NEXT: and x9, x10 , #0xfffffff0
313
+ ; CHECK-GI-NEXT: and x10, x9 , #0xfffffff0
315
314
; CHECK-GI-NEXT: add x11, x2, #32
316
315
; CHECK-GI-NEXT: add x12, x0, #16
317
- ; CHECK-GI-NEXT: mov x13, x9
316
+ ; CHECK-GI-NEXT: mov x13, x10
317
+ ; CHECK-GI-NEXT: xtn v0.4h, v0.4s
318
318
; CHECK-GI-NEXT: .LBB3_3: // %vector.body
319
319
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
320
320
; CHECK-GI-NEXT: ldp q1, q2, [x12, #-16]
321
321
; CHECK-GI-NEXT: mov x14, x11
322
322
; CHECK-GI-NEXT: subs x13, x13, #16
323
323
; CHECK-GI-NEXT: add x12, x12, #32
324
- ; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
325
- ; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
326
- ; CHECK-GI-NEXT: sshll v4.4s, v2.4h, #0
327
- ; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
328
- ; CHECK-GI-NEXT: mul v3.4s, v0.4s, v3.4s
329
- ; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
330
- ; CHECK-GI-NEXT: mul v4.4s, v0.4s, v4.4s
331
- ; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
332
- ; CHECK-GI-NEXT: stp q3, q1, [x14, #-32]!
333
- ; CHECK-GI-NEXT: stp q4, q2, [x11], #64
324
+ ; CHECK-GI-NEXT: mov d3, v1.d[1]
325
+ ; CHECK-GI-NEXT: mov d4, v2.d[1]
326
+ ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
327
+ ; CHECK-GI-NEXT: smull v2.4s, v0.4h, v2.4h
328
+ ; CHECK-GI-NEXT: smull v3.4s, v0.4h, v3.4h
329
+ ; CHECK-GI-NEXT: smull v4.4s, v0.4h, v4.4h
330
+ ; CHECK-GI-NEXT: stp q1, q3, [x14, #-32]!
331
+ ; CHECK-GI-NEXT: stp q2, q4, [x11], #64
334
332
; CHECK-GI-NEXT: b.ne .LBB3_3
335
333
; CHECK-GI-NEXT: // %bb.4: // %middle.block
336
- ; CHECK-GI-NEXT: cmp x9, x10
334
+ ; CHECK-GI-NEXT: cmp x10, x9
337
335
; CHECK-GI-NEXT: b.eq .LBB3_7
338
336
; CHECK-GI-NEXT: .LBB3_5: // %for.body.preheader1
339
- ; CHECK-GI-NEXT: add x11, x2, x9 , lsl #2
340
- ; CHECK-GI-NEXT: add x12, x0, x9 , lsl #1
341
- ; CHECK-GI-NEXT: sub x9, x10, x9
337
+ ; CHECK-GI-NEXT: add x11, x2, x10 , lsl #2
338
+ ; CHECK-GI-NEXT: add x12, x0, x10 , lsl #1
339
+ ; CHECK-GI-NEXT: sub x9, x9, x10
342
340
; CHECK-GI-NEXT: .LBB3_6: // %for.body
343
341
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
344
342
; CHECK-GI-NEXT: ldrsh w10, [x12], #2
0 commit comments