Skip to content

Commit 045b5c1

Browse files
committed
cmd/internal/obj/loong64: change the plan9 format of the prefetch instruction PRELDX
before: MOVV $n + $offset, Roff PRELDX (Rbase)(Roff), $hint after: PRELDX offset(Rbase), $n, $hint This instruction is supported in CL 671875, but is not actually used Change-Id: I943d488ea6dc77781cd796ef480a89fede666bab Reviewed-on: https://go-review.googlesource.com/c/go/+/673155 Reviewed-by: Meidan Li <[email protected]> Reviewed-by: sophie zhao <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent 4b5a64f commit 045b5c1

File tree

5 files changed

+93
-19
lines changed

5 files changed

+93
-19
lines changed

src/cmd/asm/internal/asm/asm.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,11 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
676676
prog.From = a[0]
677677
prog.To = a[1]
678678
prog.RegTo2 = a[2].Reg
679+
680+
case arch.IsLoong64PRELD(op):
681+
prog.From = a[0]
682+
prog.AddRestSourceArgs([]obj.Addr{a[1], a[2]})
683+
679684
default:
680685
prog.From = a[0]
681686
prog.Reg = p.getRegister(prog, op, &a[1])

src/cmd/asm/internal/asm/testdata/loong64enc1.s

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,6 +1035,3 @@ lable2:
10351035
PRELD (R4), $0 // 8000c02a
10361036
PRELD -1(R4), $8 // 88fcff2a
10371037
PRELD 8(R4), $31 // 9f20c02a
1038-
PRELDX (R4)(R5), $0 // 80142c38
1039-
PRELDX (R4)(R6), $8 // 88182c38
1040-
PRELDX (R4)(R7), $31 // 9f1c2c38

src/cmd/asm/internal/asm/testdata/loong64enc5.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
1515
AND $0xfedcba9876543210, R4 // AND $-81985529216486896, R4 // 7ea8ec14de4388031e539717deb73f0384f81400
1616
AND $0xfedcba9876543210, R5, R4 // AND $-81985529216486896, R5, R4 // 7ea8ec14de4388031e539717deb73f03a4f81400
1717

18+
PRELDX 0(R7), $0x80001021, $0 // PRELDX (R7), $2147487777, $0 // 1e020014de0380031e000016de130003e0782c38
19+
PRELDX -1(R7), $0x1021, $2 // PRELDX -1(R7), $4129, $2 // fe030014deffbf031e000016de030003e2782c38
20+
PRELDX 8(R7), $0x80100800, $31 // PRELDX 8(R7), $2148534272, $31 // 1ee00714de238003fe1f0016de130003ff782c38
21+
PRELDX 16(R7), $0x202040, $1 // PRELDX 16(R7), $2105408, $1 // 1e200014de4380033e000216de030003e1782c38
22+

src/cmd/internal/obj/loong64/asm.go

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,8 @@ var optab = []Optab{
416416

417417
{AVMOVQ, C_ELEM, C_NONE, C_NONE, C_ARNG, C_NONE, 45, 4, 0, 0},
418418

419-
{APRELD, C_SOREG, C_NONE, C_U5CON, C_NONE, C_NONE, 46, 4, 0, 0},
420-
{APRELDX, C_ROFF, C_NONE, C_U5CON, C_NONE, C_NONE, 47, 4, 0, 0},
419+
{APRELD, C_SOREG, C_U5CON, C_NONE, C_NONE, C_NONE, 46, 4, 0, 0},
420+
{APRELDX, C_SOREG, C_DCON, C_U5CON, C_NONE, C_NONE, 47, 20, 0, 0},
421421

422422
{obj.APCALIGN, C_U12CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
423423
{obj.APCDATA, C_32CON, C_NONE, C_NONE, C_32CON, C_NONE, 0, 0, 0, 0},
@@ -1105,6 +1105,22 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab {
11051105
c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
11061106
}
11071107

1108+
restArgsIndex := 0
1109+
restArgsLen := len(p.RestArgs)
1110+
if restArgsLen > 2 {
1111+
c.ctxt.Diag("too many RestArgs: got %v, maximum is 2\n", restArgsLen)
1112+
return nil
1113+
}
1114+
1115+
restArgsv := [2]int{C_NONE + 1, C_NONE + 1}
1116+
for i, ap := range p.RestArgs {
1117+
restArgsv[i] = int(ap.Addr.Class)
1118+
if restArgsv[i] == 0 {
1119+
restArgsv[i] = c.aclass(&ap.Addr) + 1
1120+
ap.Addr.Class = int8(restArgsv[i])
1121+
}
1122+
}
1123+
11081124
a1 := int(p.Optab)
11091125
if a1 != 0 {
11101126
return &optab[a1-1]
@@ -1130,6 +1146,9 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab {
11301146
a2 := C_NONE
11311147
if p.Reg != 0 {
11321148
a2 = c.rclass(p.Reg)
1149+
} else if restArgsLen > 0 {
1150+
a2 = restArgsv[restArgsIndex] - 1
1151+
restArgsIndex++
11331152
}
11341153

11351154
// 2nd destination operand
@@ -1140,22 +1159,20 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab {
11401159

11411160
// 3rd source operand
11421161
a3 := C_NONE
1143-
if len(p.RestArgs) > 0 {
1144-
a3 = int(p.RestArgs[0].Class)
1145-
if a3 == 0 {
1146-
a3 = c.aclass(&p.RestArgs[0].Addr) + 1
1147-
p.RestArgs[0].Class = int8(a3)
1148-
}
1149-
a3--
1162+
if restArgsLen > 0 && restArgsIndex < restArgsLen {
1163+
a3 = restArgsv[restArgsIndex] - 1
1164+
restArgsIndex++
11501165
}
11511166

11521167
ops := oprange[p.As&obj.AMask]
11531168
c1 := &xcmp[a1]
1169+
c2 := &xcmp[a2]
11541170
c3 := &xcmp[a3]
11551171
c4 := &xcmp[a4]
1172+
c5 := &xcmp[a5]
11561173
for i := range ops {
11571174
op := &ops[i]
1158-
if (int(op.reg) == a2) && c3[op.from3] && c1[op.from1] && c4[op.to1] && (int(op.to2) == a5) {
1175+
if c1[op.from1] && c2[op.reg] && c3[op.from3] && c4[op.to1] && c5[op.to2] {
11591176
p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
11601177
return op
11611178
}
@@ -2457,16 +2474,40 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
24572474
c.checkindex(p, index, m)
24582475
o1 = v | (index << 10) | (vj << 5) | vd
24592476

2460-
case 46:
2461-
// preld offset(Rbase), hint
2477+
case 46: // preld offset(Rbase), $hint
24622478
offs := c.regoff(&p.From)
24632479
hint := p.GetFrom3().Offset
24642480
o1 = OP_12IR_5I(c.opiir(p.As), uint32(offs), uint32(p.From.Reg), uint32(hint))
24652481

2466-
case 47:
2467-
// preldx (Rbase)(Roff), hint
2468-
hint := p.GetFrom3().Offset
2469-
o1 = OP_5IRR(c.opirr(p.As), uint32(p.From.Index), uint32(p.From.Reg), uint32(hint))
2482+
case 47: // preldx offset(Rbase), $n, $hint
2483+
offs := c.regoff(&p.From)
2484+
hint := p.RestArgs[1].Offset
2485+
n := uint64(p.GetFrom3().Offset)
2486+
2487+
addrSeq := (n >> 0) & 0x1
2488+
blkSize := (n >> 1) & 0x7ff
2489+
blkNums := (n >> 12) & 0x1ff
2490+
stride := (n >> 21) & 0xffff
2491+
2492+
if blkSize > 1024 {
2493+
c.ctxt.Diag("%v: block_size amount out of range[16, 1024]: %v\n", p, blkSize)
2494+
}
2495+
2496+
if blkNums > 256 {
2497+
c.ctxt.Diag("%v: block_nums amount out of range[1, 256]: %v\n", p, blkSize)
2498+
}
2499+
2500+
v := (uint64(offs) & 0xffff)
2501+
v += addrSeq << 16
2502+
v += ((blkSize / 16) - 1) << 20
2503+
v += (blkNums - 1) << 32
2504+
v += stride << 44
2505+
2506+
o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
2507+
o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
2508+
o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
2509+
o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
2510+
o5 = OP_5IRR(c.opirr(p.As), uint32(REGTMP), uint32(p.From.Reg), uint32(hint))
24702511

24712512
case 49:
24722513
if p.As == ANOOP {

src/cmd/internal/obj/loong64/doc.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,5 +226,31 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate)
226226
- When using the AM*_.W[U]/D[U] instruction, registers rd and rj cannot be the same,
227227
otherwise an exception is triggered, and rd and rk cannot be the same, otherwise
228228
the execution result is uncertain.
229+
230+
3. Prefetch instructions
231+
Instruction format:
232+
PRELD offset(Rbase), $hint
233+
PRELDX offset(Rbase), $n, $hint
234+
235+
Mapping between Go and platform assembly:
236+
Go assembly | platform assembly
237+
PRELD offset(Rbase), $hint | preld hint, Rbase, offset
238+
PRELDX offset(Rbase), $n, $hint | move rk, $x; preldx hint, Rbase, rk
239+
240+
note: $x is the value after $n and offset are reassembled
241+
242+
Definition of hint value:
243+
0: load to L1
244+
2: load to L3
245+
8: store to L1
246+
247+
The meaning of the rest of values is not defined yet, and the processor executes it as NOP
248+
249+
Definition of $n in the PRELDX instruction:
250+
bit[0]: address sequence, 0 indicating ascending and 1 indicating descending
251+
bits[11:1]: block size, the value range is [16, 1024], and it must be an integer multiple of 16
252+
bits[20:12]: block num, the value range is [1, 256]
253+
bits[36:21]: stride, the value range is [0, 0xffff]
229254
*/
255+
230256
package loong64

0 commit comments

Comments
 (0)