Skip to content

Commit d324f21

Browse files
committed
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out. On a 4-core Lenovo X201s (Linux): 31.12u 0.60s 31.74r 1 cpu, no atomics 32.27u 0.58s 32.86r 1 cpu, atomic instructions 33.04u 0.83s 27.47r 2 cpu On a 16-core Xeon (Linux): 33.08u 0.65s 33.80r 1 cpu, no atomics 34.87u 1.12s 29.60r 2 cpu 36.00u 1.87s 28.43r 3 cpu 36.46u 2.34s 27.10r 4 cpu 38.28u 3.85s 26.92r 5 cpu 37.72u 5.25s 26.73r 6 cpu 39.63u 7.11s 26.95r 7 cpu 39.67u 8.10s 26.68r 8 cpu On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5): 39.43u 1.45s 41.27r 1 cpu, no atomics 43.98u 2.95s 38.69r 2 cpu On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1): 48.81u 2.12s 51.76r 1 cpu, no atomics 57.15u 4.72s 51.54r 2 cpu The handoff algorithm is really only good for two cores. Beyond that we will need to so something more sophisticated, like have each core hand off to the next one, around a circle. Even so, the code is a good checkpoint; for now we'll limit the number of gc procs to at most 2. R=dvyukov CC=golang-dev https://golang.org/cl/4641082
1 parent b0cddb9 commit d324f21

File tree

15 files changed

+709
-169
lines changed

15 files changed

+709
-169
lines changed

src/pkg/runtime/darwin/386/sys.s

+31-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ TEXT runtime·sigtramp(SB),7,$40
9797
// save g
9898
MOVL g(CX), DI
9999
MOVL DI, 20(SP)
100-
100+
101101
// g = m->gsignal
102102
MOVL m(CX), BP
103103
MOVL m_gsignal(BP), BP
@@ -111,7 +111,7 @@ TEXT runtime·sigtramp(SB),7,$40
111111
MOVL context+16(FP), BX
112112
MOVL BX, 8(SP)
113113
MOVL DI, 12(SP)
114-
114+
115115
MOVL handler+0(FP), BX
116116
CALL BX
117117

@@ -138,6 +138,26 @@ TEXT runtime·sigaltstack(SB),7,$0
138138
CALL runtime·notok(SB)
139139
RET
140140

141+
TEXT runtime·usleep(SB),7,$32
142+
MOVL $0, DX
143+
MOVL usec+0(FP), AX
144+
MOVL $1000000, CX
145+
DIVL CX
146+
MOVL AX, 24(SP) // sec
147+
MOVL DX, 28(SP) // usec
148+
149+
// select(0, 0, 0, 0, &tv)
150+
MOVL $0, 0(SP) // "return PC" - ignored
151+
MOVL $0, 4(SP)
152+
MOVL $0, 8(SP)
153+
MOVL $0, 12(SP)
154+
MOVL $0, 16(SP)
155+
LEAL 24(SP), AX
156+
MOVL AX, 20(SP)
157+
MOVL $93, AX
158+
INT $0x80
159+
RET
160+
141161
// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
142162
// System call args are: func arg stack pthread flags.
143163
TEXT runtime·bsdthread_create(SB),7,$32
@@ -309,3 +329,12 @@ TEXT runtime·setldt(SB),7,$32
309329
XORL AX, AX
310330
MOVW GS, AX
311331
RET
332+
333+
TEXT runtime·sysctl(SB),7,$0
334+
MOVL $202, AX
335+
INT $0x80
336+
JAE 3(PC)
337+
NEGL AX
338+
RET
339+
MOVL $0, AX
340+
RET

src/pkg/runtime/darwin/amd64/sys.s

+36-3
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ TEXT runtime·sigaction(SB),7,$0
8181

8282
TEXT runtime·sigtramp(SB),7,$64
8383
get_tls(BX)
84-
84+
8585
// save g
8686
MOVQ g(BX), R10
8787
MOVQ R10, 48(SP)
88-
88+
8989
// g = m->gsignal
9090
MOVQ m(BX), BP
9191
MOVQ m_gsignal(BP), BP
@@ -146,6 +146,24 @@ TEXT runtime·sigaltstack(SB),7,$0
146146
CALL runtime·notok(SB)
147147
RET
148148

149+
TEXT runtime·usleep(SB),7,$16
150+
MOVL $0, DX
151+
MOVL usec+0(FP), AX
152+
MOVL $1000000, CX
153+
DIVL CX
154+
MOVQ AX, 0(SP) // sec
155+
MOVL DX, 8(SP) // usec
156+
157+
// select(0, 0, 0, 0, &tv)
158+
MOVL $0, DI
159+
MOVL $0, SI
160+
MOVL $0, DX
161+
MOVL $0, R10
162+
MOVQ SP, R8
163+
MOVL $(0x2000000+23), AX
164+
SYSCALL
165+
RET
166+
149167
// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
150168
TEXT runtime·bsdthread_create(SB),7,$0
151169
// Set up arguments to bsdthread_create system call.
@@ -189,7 +207,7 @@ TEXT runtime·bsdthread_start(SB),7,$0
189207
POPQ SI
190208
POPQ CX
191209
POPQ DX
192-
210+
193211
get_tls(BX)
194212
MOVQ CX, m(BX)
195213
MOVQ SI, m_procid(CX) // thread port is m->procid
@@ -293,3 +311,18 @@ TEXT runtime·settls(SB),7,$32
293311
MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3
294312
SYSCALL
295313
RET
314+
315+
TEXT runtime·sysctl(SB),7,$0
316+
MOVQ 8(SP), DI
317+
MOVL 16(SP), SI
318+
MOVQ 24(SP), DX
319+
MOVQ 32(SP), R10
320+
MOVQ 40(SP), R8
321+
MOVQ 48(SP), R9
322+
MOVL $(0x2000000+202), AX // syscall entry
323+
SYSCALL
324+
JCC 3(PC)
325+
NEGL AX
326+
RET
327+
MOVL $0, AX
328+
RET

src/pkg/runtime/darwin/os.h

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ uint32 runtime·mach_task_self(void);
1818
uint32 runtime·mach_task_self(void);
1919
uint32 runtime·mach_thread_self(void);
2020
uint32 runtime·mach_thread_self(void);
21+
int32 runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
2122

2223
struct Sigaction;
2324
void runtime·sigaction(uintptr, struct Sigaction*, struct Sigaction*);

src/pkg/runtime/darwin/thread.c

+14
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,20 @@ runtime·osinit(void)
148148
if(!runtime·iscgo)
149149
runtime·bsdthread_register();
150150
runtime·destroylock = destroylock;
151+
152+
// Use sysctl to fetch hw.ncpu.
153+
uint32 mib[2];
154+
uint32 out;
155+
int32 ret;
156+
uintptr nout;
157+
158+
mib[0] = 6;
159+
mib[1] = 3;
160+
nout = sizeof out;
161+
out = 0;
162+
ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
163+
if(ret >= 0)
164+
runtime·ncpu = out;
151165
}
152166

153167
void

src/pkg/runtime/linux/386/sys.s

+25-6
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,25 @@ TEXT runtime·read(SB),7,$0
5252
CALL *runtime·_vdso(SB)
5353
RET
5454

55+
TEXT runtime·usleep(SB),7,$28
56+
MOVL $0, DX
57+
MOVL usec+0(FP), AX
58+
MOVL $1000000, CX
59+
DIVL CX
60+
MOVL AX, 20(SP)
61+
MOVL DX, 24(SP)
62+
63+
// select(0, 0, 0, 0, &tv)
64+
MOVL $0, 0(SP)
65+
MOVL $0, 4(SP)
66+
MOVL $0, 8(SP)
67+
MOVL $0, 12(SP)
68+
LEAL 20(SP), AX
69+
MOVL AX, 16(SP)
70+
MOVL $82, AX
71+
SYSCALL
72+
RET
73+
5574
TEXT runtime·raisesigpipe(SB),7,$12
5675
MOVL $224, AX // syscall - gettid
5776
CALL *runtime·_vdso(SB)
@@ -105,16 +124,16 @@ TEXT runtime·rt_sigaction(SB),7,$0
105124

106125
TEXT runtime·sigtramp(SB),7,$44
107126
get_tls(CX)
108-
127+
109128
// save g
110129
MOVL g(CX), DI
111130
MOVL DI, 20(SP)
112-
131+
113132
// g = m->gsignal
114133
MOVL m(CX), BX
115134
MOVL m_gsignal(BX), BX
116135
MOVL BX, g(CX)
117-
136+
118137
// copy arguments for call to sighandler
119138
MOVL sig+0(FP), BX
120139
MOVL BX, 0(SP)
@@ -125,12 +144,12 @@ TEXT runtime·sigtramp(SB),7,$44
125144
MOVL DI, 12(SP)
126145

127146
CALL runtime·sighandler(SB)
128-
147+
129148
// restore g
130149
get_tls(CX)
131150
MOVL 20(SP), BX
132151
MOVL BX, g(CX)
133-
152+
134153
RET
135154

136155
TEXT runtime·sigignore(SB),7,$0
@@ -202,7 +221,7 @@ TEXT runtime·clone(SB),7,$0
202221
MOVL $1234, 12(CX)
203222

204223
// cannot use CALL *runtime·_vdso(SB) here, because
205-
// the stack changes during the system call (after
224+
// the stack changes during the system call (after
206225
// CALL *runtime·_vdso(SB), the child is still using
207226
// the parent's stack when executing its RET instruction).
208227
INT $0x80

src/pkg/runtime/linux/amd64/sys.s

+20-2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@ TEXT runtime·read(SB),7,$0-24
5050
SYSCALL
5151
RET
5252

53+
TEXT runtime·usleep(SB),7,$16
54+
MOVL $0, DX
55+
MOVL usec+0(FP), AX
56+
MOVL $1000000, CX
57+
DIVL CX
58+
MOVQ AX, 0(SP)
59+
MOVQ DX, 8(SP)
60+
61+
// select(0, 0, 0, 0, &tv)
62+
MOVL $0, DI
63+
MOVL $0, SI
64+
MOVL $0, DX
65+
MOVL $0, R10
66+
MOVQ SP, R8
67+
MOVL $23, AX
68+
SYSCALL
69+
RET
70+
5371
TEXT runtime·raisesigpipe(SB),7,$12
5472
MOVL $186, AX // syscall - gettid
5573
SYSCALL
@@ -195,10 +213,10 @@ TEXT runtime·clone(SB),7,$0
195213
CMPQ AX, $0
196214
JEQ 2(PC)
197215
RET
198-
216+
199217
// In child, on new stack.
200218
MOVQ SI, SP
201-
219+
202220
// Initialize m->procid to Linux tid
203221
MOVL $186, AX // gettid
204222
SYSCALL

src/pkg/runtime/linux/arm/sys.s

+20-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#define SYS_gettid (SYS_BASE + 224)
3434
#define SYS_tkill (SYS_BASE + 238)
3535
#define SYS_sched_yield (SYS_BASE + 158)
36+
#define SYS_select (SYS_BASE + 82)
3637

3738
#define ARM_BASE (SYS_BASE + 0x0f0000)
3839
#define SYS_ARM_cacheflush (ARM_BASE + 2)
@@ -254,7 +255,7 @@ TEXT runtime·sigtramp(SB),7,$24
254255
// save g
255256
MOVW g, R3
256257
MOVW g, 20(R13)
257-
258+
258259
// g = m->gsignal
259260
MOVW m_gsignal(m), g
260261

@@ -265,7 +266,7 @@ TEXT runtime·sigtramp(SB),7,$24
265266
MOVW R3, 16(R13)
266267

267268
BL runtime·sighandler(SB)
268-
269+
269270
// restore g
270271
MOVW 20(R13), g
271272

@@ -285,6 +286,23 @@ TEXT runtime·sigreturn(SB),7,$0
285286
SWI $0
286287
RET
287288

289+
TEXT runtime·usleep(SB),7,$12
290+
MOVW usec+0(FP), R0
291+
MOVW R0, R1
292+
MOVW $1000000, R2
293+
DIV R1, R0
294+
MOD R2, R0
295+
MOVW R1, 4(SP)
296+
MOVW R2, 8(SP)
297+
MOVW $0, R0
298+
MOVW $0, R1
299+
MOVW $0, R2
300+
MOVW $0, R3
301+
MOVW $4(SP), R4
302+
MOVW $SYS_select, R7
303+
SWI $0
304+
RET
305+
288306
// Use kernel version instead of native armcas in ../../arm.s.
289307
// See ../../../sync/atomic/asm_linux_arm.s for details.
290308
TEXT cas<>(SB),7,$0

src/pkg/runtime/linux/thread.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "stack.h"
99

1010
extern SigTab runtime·sigtab[];
11-
static int32 proccount;
1211

1312
int32 runtime·open(uint8*, int32, int32);
1413
int32 runtime·close(int32);
@@ -136,13 +135,10 @@ futexlock(Lock *l)
136135
// its wakeup call.
137136
wait = v;
138137

139-
if(proccount == 0)
140-
proccount = getproccount();
141-
142138
// On uniprocessor's, no point spinning.
143139
// On multiprocessors, spin for ACTIVE_SPIN attempts.
144140
spin = 0;
145-
if(proccount > 1)
141+
if(runtime·ncpu > 1)
146142
spin = ACTIVE_SPIN;
147143

148144
for(;;) {
@@ -276,6 +272,7 @@ runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void))
276272
void
277273
runtime·osinit(void)
278274
{
275+
runtime·ncpu = getproccount();
279276
}
280277

281278
void

0 commit comments

Comments
 (0)