Skip to content

Commit c7a193a

Browse files
committed
GC support for local allocations (#29)
GC support for local allocations * Local arena is reallocated as needed, growing 4x each time (Old arenas currently not freed, but 4x growth means they amount to only 1/3 of the space used by the stack) * Separate arenas per systhread, swapped on context switch * Root scanning distinguishes local and heap roots directly (rather that relying on the page table check) * Simple unidirectional marker for GC, so that dead local allocs do not extend the lifetimes of heap blocks they point to * Debug mode minor heap check: verifies that the minor heap is well-formed and does not contain any pointers to the local arena (even from dead blocks) * Bugfix for caml_alloc_local
1 parent 8dd7270 commit c7a193a

18 files changed

+408
-152
lines changed

asmcomp/cmm_helpers.ml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ let bind_nonvar name arg fn =
3838
| _ -> let id = V.create_local name in Clet(VP.create id, arg, fn (Cvar id))
3939

4040
let caml_black = Nativeint.shift_left (Nativeint.of_int 3) 8
41+
let caml_local = Nativeint.shift_left (Nativeint.of_int 2) 8
4142
(* cf. runtime/caml/gc.h *)
4243

4344
(* Block headers. Meaning of the tag field: see stdlib/obj.ml *)
@@ -51,6 +52,7 @@ let block_header tag sz =
5152
in no-naked-pointers mode. See [caml_darken] and the code below that emits
5253
structured constants and static module definitions. *)
5354
let black_block_header tag sz = Nativeint.logor (block_header tag sz) caml_black
55+
let local_block_header tag sz = Nativeint.logor (block_header tag sz) caml_local
5456
let white_closure_header sz = block_header Obj.closure_tag sz
5557
let black_closure_header sz = black_block_header Obj.closure_tag sz
5658
let infix_header ofs = block_header Obj.infix_tag ofs
@@ -795,8 +797,12 @@ let call_cached_method obj tag cache pos args dbg =
795797

796798
let make_alloc_generic ~mode set_fn dbg tag wordsize args =
797799
if mode = Lambda.Alloc_local || wordsize <= Config.max_young_wosize then
798-
Cop(Calloc mode,
799-
Cconst_natint(block_header tag wordsize, dbg) :: args, dbg)
800+
let hdr =
801+
match mode with
802+
| Lambda.Alloc_local -> local_block_header tag wordsize
803+
| Lambda.Alloc_heap -> block_header tag wordsize
804+
in
805+
Cop(Calloc mode, Cconst_natint(hdr, dbg) :: args, dbg)
800806
else begin
801807
let id = V.create_local "*alloc*" in
802808
let rec fill_fields idx = function

otherlibs/systhreads/st_stubs.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ struct caml_thread_struct {
7979
value * gc_regs; /* Saved value of Caml_state->gc_regs */
8080
char * exception_pointer; /* Saved value of Caml_state->exception_pointer */
8181
struct caml__roots_block * local_roots; /* Saved value of local_roots */
82+
struct caml_local_arenas * local_arenas;
8283
struct longjmp_buffer * exit_buf; /* For thread exit */
8384
#else
8485
value * stack_low; /* The execution stack for this thread */
@@ -148,8 +149,8 @@ static void caml_thread_scan_roots(scanning_action action)
148149
if (th != curr_thread) {
149150
#ifdef NATIVE_CODE
150151
if (th->bottom_of_stack != NULL)
151-
caml_do_local_roots(action, th->bottom_of_stack, th->last_retaddr,
152-
th->gc_regs, th->local_roots);
152+
caml_do_local_roots(action, action, th->bottom_of_stack, th->last_retaddr,
153+
th->gc_regs, th->local_roots, th->local_arenas);
153154
#else
154155
caml_do_local_roots(action, th->sp, th->stack_high, th->local_roots);
155156
#endif
@@ -181,6 +182,7 @@ Caml_inline void caml_thread_save_runtime_state(void)
181182
curr_thread->last_retaddr = Caml_state->last_return_address;
182183
curr_thread->gc_regs = Caml_state->gc_regs;
183184
curr_thread->exception_pointer = Caml_state->exception_pointer;
185+
curr_thread->local_arenas = caml_get_local_arenas();
184186
#else
185187
curr_thread->stack_low = Caml_state->stack_low;
186188
curr_thread->stack_high = Caml_state->stack_high;
@@ -204,6 +206,7 @@ Caml_inline void caml_thread_restore_runtime_state(void)
204206
Caml_state->last_return_address = curr_thread->last_retaddr;
205207
Caml_state->gc_regs = curr_thread->gc_regs;
206208
Caml_state->exception_pointer = curr_thread->exception_pointer;
209+
caml_set_local_arenas(curr_thread->local_arenas);
207210
#else
208211
Caml_state->stack_low = curr_thread->stack_low;
209212
Caml_state->stack_high = curr_thread->stack_high;
@@ -332,6 +335,7 @@ static caml_thread_t caml_thread_new_info(void)
332335
th->last_retaddr = 1;
333336
th->exception_pointer = NULL;
334337
th->local_roots = NULL;
338+
th->local_arenas = NULL;
335339
th->exit_buf = NULL;
336340
#else
337341
/* Allocate the stacks */

runtime/caml/domain_state.tbl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ DOMAIN_STATE(struct caml_ephe_ref_table*, ephe_ref_table)
3636
DOMAIN_STATE(struct caml_custom_table*, custom_table)
3737
/* See minor_gc.c */
3838

39+
DOMAIN_STATE(struct caml_local_arenas*, local_arenas)
3940
DOMAIN_STATE(intnat, local_sp)
40-
DOMAIN_STATE(struct region_stack*, local_top)
41+
DOMAIN_STATE(void*, local_top)
4142
DOMAIN_STATE(intnat, local_limit)
43+
4244
DOMAIN_STATE(intnat, local_total)
4345

4446
DOMAIN_STATE(struct mark_stack*, mark_stack)

runtime/caml/gc.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,39 @@
6464
#define Colornum_hd(hd) ((color_t) (((hd) >> 8) & 3))
6565
#define Coloredhd_hd(hd,colnum) (((hd) & ~Caml_black) | ((colnum) << 8))
6666

67+
#ifdef CAML_INTERNALS
68+
69+
70+
#define Init_local_arena_bsize 4096
71+
#ifdef ARCH_SIXTYFOUR
72+
#define Max_local_arenas 10 /* max 4G */
73+
#else
74+
#define Max_local_arenas 8 /* max 1G */
75+
#endif
76+
77+
struct caml_local_arena {
78+
char* base;
79+
uintnat length;
80+
};
81+
typedef struct caml_local_arenas {
82+
int count;
83+
intnat saved_sp;
84+
intnat next_length;
85+
struct caml_local_arena arenas[Max_local_arenas];
86+
} caml_local_arenas;
87+
88+
/* Colors for locally allocated values.
89+
(Only used during root-scanning, never visible to the rest of the GC) */
90+
#define Local_marked Caml_black
91+
#define Local_unmarked Caml_blue /* allocation color of local objects */
92+
#define Local_scanned Caml_gray
93+
94+
#define With_color_hd(hd, color) \
95+
(((hd) & ~Caml_black) | color)
96+
97+
/* Neither a valid header nor value */
98+
#define Local_uninit_hd Make_header(0, 0x42, Local_unmarked)
99+
100+
#endif /* CAML_INTERNALS */
101+
67102
#endif /* CAML_GC_H */

runtime/caml/memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ extern void caml_alloc_small_dispatch (intnat wosize, int flags,
250250

251251
#define Modify(fp,val) caml_modify((fp), (val))
252252

253+
struct caml_local_arenas* caml_get_local_arenas();
254+
void caml_set_local_arenas(struct caml_local_arenas* s);
255+
253256
#endif /* CAML_INTERNALS */
254257

255258
struct caml__roots_block {

runtime/caml/misc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,11 @@ int caml_runtime_warnings_active(void);
398398
#define Debug_free_shrink Debug_tag (0x03)
399399
#define Debug_free_truncate Debug_tag (0x04)
400400
#define Debug_free_unused Debug_tag (0x05)
401+
#define Debug_free_local Debug_tag (0x06)
401402
#define Debug_uninit_minor Debug_tag (0x10)
402403
#define Debug_uninit_major Debug_tag (0x11)
403404
#define Debug_uninit_align Debug_tag (0x15)
405+
#define Debug_uninit_local Debug_tag (0x16)
404406
#define Debug_filler_align Debug_tag (0x85)
405407
#define Debug_pool_magic Debug_tag (0x99)
406408

runtime/caml/roots.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ CAMLextern void caml_do_local_roots_byt (scanning_action, value *, value *,
3434
#define caml_do_local_roots caml_do_local_roots_byt
3535
#else
3636
CAMLextern void caml_do_local_roots_nat (
37-
scanning_action f, char * c_bottom_of_stack,
37+
scanning_action maj, scanning_action min,
38+
char * c_bottom_of_stack,
3839
uintnat last_retaddr, value * v_gc_regs,
39-
struct caml__roots_block * gc_local_roots);
40+
struct caml__roots_block * gc_local_roots,
41+
struct caml_local_arenas* local_arenas);
4042
#define caml_do_local_roots caml_do_local_roots_nat
4143
#endif
4244

runtime/caml/stack.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,11 @@
4444
#else
4545
#error "TARGET_power: wrong MODEL"
4646
#endif
47+
/* FIXME: Already_scanned optimisation not supported on this branch
4748
#define Already_scanned(sp, retaddr) ((retaddr) & 1)
4849
#define Mask_already_scanned(retaddr) ((retaddr) & ~1)
4950
#define Mark_scanned(sp, retaddr) Saved_return_address(sp) = (retaddr) | 1
51+
*/
5052
#endif
5153

5254
#ifdef TARGET_s390x

runtime/memory.c

Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "caml/signals.h"
3535
#include "caml/memprof.h"
3636
#include "caml/eventlog.h"
37+
#include "caml/alloc.h"
3738

3839
int caml_huge_fallback_count = 0;
3940
/* Number of times that mmapping big pages fails and we fell back to small
@@ -676,11 +677,6 @@ CAMLexport CAMLweakdef void caml_modify (value *fp, value val)
676677
}
677678
}
678679

679-
struct region_stack {
680-
char* base;
681-
struct region_stack* next;
682-
};
683-
684680
CAMLexport intnat caml_local_region_begin()
685681
{
686682
return Caml_state->local_sp;
@@ -691,37 +687,98 @@ CAMLexport void caml_local_region_end(intnat reg)
691687
Caml_state->local_sp = reg;
692688
}
693689

694-
//#define Local_init_wsz 64
695-
#define Local_init_wsz (4096)
696-
void caml_local_realloc()
690+
CAMLexport caml_local_arenas* caml_get_local_arenas()
691+
{
692+
caml_local_arenas* s = Caml_state->local_arenas;
693+
if (s != NULL)
694+
s->saved_sp = Caml_state->local_sp;
695+
return s;
696+
}
697+
698+
CAMLexport void caml_set_local_arenas(caml_local_arenas* s)
697699
{
698-
intnat new_bsize;
699-
struct region_stack* stk;
700-
char* stkbase;
701-
if (Caml_state->local_top == NULL) {
702-
new_bsize = Bsize_wsize(Local_init_wsz);
700+
Caml_state->local_arenas = s;
701+
if (s != NULL) {
702+
struct caml_local_arena a = s->arenas[s->count - 1];
703+
Caml_state->local_sp = s->saved_sp;
704+
Caml_state->local_top = (void*)(a.base + a.length);
705+
Caml_state->local_limit = - a.length;
703706
} else {
704-
CAMLassert((char*)Caml_state->local_top + Caml_state->local_limit == Caml_state->local_top->base);
705-
new_bsize = -Caml_state->local_limit;
707+
Caml_state->local_sp = 0;
708+
Caml_state->local_top = NULL;
709+
Caml_state->local_limit = 0;
706710
}
707-
while (Caml_state->local_sp < -new_bsize) new_bsize *= 2;
708-
stkbase = caml_stat_alloc(new_bsize + sizeof(struct region_stack));
709-
stk = (struct region_stack*)(stkbase + new_bsize);
710-
memset(stkbase, 0x42, new_bsize); /* FIXME debugging only */
711-
stk->base = stkbase;
712-
stk->next = Caml_state->local_top;
713-
Caml_state->local_top = stk;
714-
Caml_state->local_limit = -new_bsize;
711+
}
712+
713+
void caml_local_realloc()
714+
{
715+
caml_local_arenas* s = caml_get_local_arenas();
716+
intnat i;
717+
char* arena;
718+
if (s == NULL) {
719+
s = caml_stat_alloc(sizeof(*s));
720+
s->count = 0;
721+
s->next_length = 0;
722+
s->saved_sp = Caml_state->local_sp;
723+
}
724+
if (s->count == Max_local_arenas)
725+
caml_fatal_error("Local allocation stack overflow - exceeded Max_local_arenas");
726+
727+
do {
728+
if (s->next_length == 0) {
729+
s->next_length = Init_local_arena_bsize;
730+
} else {
731+
/* overflow check */
732+
CAML_STATIC_ASSERT(((intnat)Init_local_arena_bsize << (2*Max_local_arenas)) > 0);
733+
s->next_length *= 4;
734+
}
735+
/* may need to loop, if a very large allocation was requested */
736+
} while (s->saved_sp + s->next_length < 0);
737+
738+
arena = caml_stat_alloc_noexc(s->next_length);
739+
if (arena == NULL)
740+
caml_fatal_error("Local allocation stack overflow - out of memory");
741+
#ifdef DEBUG
742+
for (i = 0; i < s->next_length; i += sizeof(value)) {
743+
*((header_t*)(arena + i)) = Debug_uninit_local;
744+
}
745+
#endif
746+
for (i = s->saved_sp; i < 0; i += sizeof(value)) {
747+
*((header_t*)(arena + s->next_length + i)) = Local_uninit_hd;
748+
}
749+
caml_gc_message(0x08,
750+
"Growing local stack to %"ARCH_INTNAT_PRINTF_FORMAT"d kB\n",
751+
s->next_length / 1024);
752+
s->count++;
753+
s->arenas[s->count-1].length = s->next_length;
754+
s->arenas[s->count-1].base = arena;
755+
caml_set_local_arenas(s);
756+
CAMLassert(Caml_state->local_limit <= Caml_state->local_sp);
715757
}
716758

717759
CAMLexport value caml_alloc_local(mlsize_t wosize, tag_t tag)
718760
{
761+
#ifdef NATIVE_CODE
719762
intnat sp = Caml_state->local_sp;
763+
header_t* hp;
720764
sp -= Bhsize_wosize(wosize);
721765
Caml_state->local_sp = sp;
722766
if (sp < Caml_state->local_limit)
723767
caml_local_realloc();
724-
return Val_hp((char*)Caml_state->local_top + sp);
768+
hp = (header_t*)((char*)Caml_state->local_top + sp);
769+
*hp = Make_header(wosize, tag, Local_unmarked);
770+
return Val_hp(hp);
771+
#else
772+
if (wosize <= Max_young_wosize) {
773+
return caml_alloc_small(wosize, tag);
774+
} else {
775+
/* The return value is initialised directly using Field.
776+
This is invalid if it may create major -> minor pointers.
777+
So, perform a minor GC to prevent this. (See caml_make_vect) */
778+
caml_minor_collection();
779+
return caml_alloc_shr(wosize, tag);
780+
}
781+
#endif
725782
}
726783

727784
/* Global memory pool.

runtime/minor_gc.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,36 @@ void caml_oldify_mopup (void)
348348
if (redo) goto again;
349349
}
350350

351+
#ifdef DEBUG
352+
static void verify_minor_heap()
353+
{
354+
header_t* p;
355+
struct caml_local_arena* arena = Caml_state->local_arenas ?
356+
&Caml_state->local_arenas->arenas[Caml_state->local_arenas->count-1] : NULL;
357+
for (p = (header_t*)Caml_state->young_ptr;
358+
p < (header_t*)Caml_state->young_alloc_end;
359+
p += Whsize_hp(p)) {
360+
header_t hd = *p;
361+
CAMLassert_young_header(hd);
362+
if (Tag_hd(hd) < No_scan_tag) {
363+
intnat i = 0;
364+
if (Tag_hd(hd) == Closure_tag)
365+
i = Start_env_closinfo(Closinfo_val(Val_hp(p)));
366+
for (; i < Wosize_hd(hd); i++) {
367+
value v = Field(Val_hp(p), i);
368+
if (Is_block(v)) {
369+
if (Is_young(v)) CAMLassert ((value)Caml_state->young_ptr < v);
370+
if (arena) {
371+
CAMLassert(!(arena->base <= (char*)v &&
372+
(char*)v < arena->base + arena->length));
373+
}
374+
}
375+
}
376+
}
377+
}
378+
}
379+
#endif
380+
351381
/* Make sure the minor heap is empty by performing a minor collection
352382
if needed.
353383
*/
@@ -360,6 +390,9 @@ void caml_empty_minor_heap (void)
360390

361391
if (Caml_state->young_ptr != Caml_state->young_alloc_end){
362392
CAMLassert_young_header(*(header_t*)Caml_state->young_ptr);
393+
#ifdef DEBUG
394+
verify_minor_heap();
395+
#endif
363396
if (caml_minor_gc_begin_hook != NULL) (*caml_minor_gc_begin_hook) ();
364397
prev_alloc_words = caml_allocated_words;
365398
Caml_state->in_minor_collection = 1;

0 commit comments

Comments
 (0)