Skip to content

Commit 66a06b9

Browse files
authored
Use new directives on x86 (#3931)
1 parent 54d6467 commit 66a06b9

11 files changed

+659
-457
lines changed

backend/amd64/emit.ml

Lines changed: 454 additions & 401 deletions
Large diffs are not rendered by default.

backend/arm64/emit.ml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,7 @@ let emit_literals p align emit_literal =
778778
(* CR sspies: The following section is incorrect. We are in a data section
779779
here. Fix this when cleaning up the section mechanism. *)
780780
D.unsafe_set_internal_section_ref Text);
781-
D.align ~bytes:align;
781+
D.align ~fill_x86_bin_emitter:Nop ~bytes:align;
782782
List.iter emit_literal !p;
783783
p := [])
784784

@@ -2069,7 +2069,7 @@ let fundecl fundecl =
20692069
contains_calls := fundecl.fun_contains_calls;
20702070
emit_named_text_section !function_name;
20712071
let fun_sym = S.create fundecl.fun_name in
2072-
D.align ~bytes:8;
2072+
D.align ~fill_x86_bin_emitter:Nop ~bytes:8;
20732073
D.global fun_sym;
20742074
D.type_symbol ~ty:Function fun_sym;
20752075
D.define_symbol_label ~section:Text fun_sym;
@@ -2130,11 +2130,11 @@ let emit_item (d : Cmm.data_item) =
21302130
D.symbol_plus_offset ~offset_in_bytes:(Targetint.of_int o) sym
21312131
| Cstring s -> D.string s
21322132
| Cskip n -> D.space ~bytes:n
2133-
| Calign n -> D.align ~bytes:n
2133+
| Calign n -> D.align ~fill_x86_bin_emitter:Zero ~bytes:n
21342134

21352135
let data l =
21362136
D.data ();
2137-
D.align ~bytes:8;
2137+
D.align ~fill_x86_bin_emitter:Zero ~bytes:8;
21382138
List.iter emit_item l
21392139

21402140
let file_emitter ~file_num ~file_name =
@@ -2172,7 +2172,7 @@ let begin_assembly _unix =
21722172
if macosx
21732173
then (
21742174
DSL.ins I.NOP [||];
2175-
D.align ~bytes:8);
2175+
D.align ~fill_x86_bin_emitter:Nop ~bytes:8);
21762176
let code_end = Cmm_helpers.make_symbol "code_end" in
21772177
Emitaux.Dwarf_helpers.begin_dwarf ~code_begin ~code_end ~file_emitter
21782178

@@ -2190,7 +2190,7 @@ let end_assembly () =
21902190
D.global data_end_sym;
21912191
D.define_symbol_label ~section:Data data_end_sym;
21922192
D.int64 0L;
2193-
D.align ~bytes:8;
2193+
D.align ~fill_x86_bin_emitter:Zero ~bytes:8;
21942194
(* #7887 *)
21952195
let frametable = Cmm_helpers.make_symbol "frametable" in
21962196
let frametable_sym = S.create frametable in
@@ -2213,7 +2213,7 @@ let end_assembly () =
22132213
(* CR sspies: for some reason, we can get negative numbers here *)
22142214
efa_32 = (fun n -> D.int32 n);
22152215
efa_word = (fun n -> D.targetint (Targetint.of_int_exn n));
2216-
efa_align = (fun n -> D.align ~bytes:n);
2216+
efa_align = (fun n -> D.align ~fill_x86_bin_emitter:Zero ~bytes:n);
22172217
efa_label_rel =
22182218
(fun lbl ofs ->
22192219
let lbl = label_to_asm_label ~section:Data lbl in

backend/asm_targets/asm_directives_new.ml

Lines changed: 79 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ type symbol_type =
6060
| Function
6161
| Object
6262

63+
type align_padding =
64+
| Nop
65+
| Zero
66+
6367
(* CR sspies: We should use the "STT" forms when they are supported as they are
6468
unambiguous across platforms (cf.
6569
https://sourceware.org/binutils/docs/as/Type.html). *)
@@ -145,10 +149,15 @@ module Directive = struct
145149
| Code
146150
| Machine_width_data
147151

152+
type reloc_type = R_X86_64_PLT32
153+
148154
type comment = string
149155

150156
type t =
151-
| Align of { bytes : int }
157+
| Align of
158+
{ bytes : int;
159+
fill_x86_bin_emitter : align_padding
160+
}
152161
| Bytes of
153162
{ str : string;
154163
comment : string option
@@ -202,6 +211,14 @@ module Directive = struct
202211
comment : string option
203212
}
204213
| Protected of string
214+
| Hidden of string
215+
| Weak of string
216+
| External of string
217+
| Reloc of
218+
{ offset : Constant.t;
219+
name : reloc_type;
220+
expr : Constant.t
221+
}
205222

206223
let bprintf = Printf.bprintf
207224

@@ -266,6 +283,8 @@ module Directive = struct
266283
bprintf buf "\t.ascii\t\"%s\""
267284
(string_of_string_literal (String.sub s !i (l - !i)))
268285

286+
let reloc_type_to_string = function R_X86_64_PLT32 -> "R_X86_64_PLT32"
287+
269288
let print_gas buf t =
270289
let gas_comment_opt comment_opt =
271290
if not (emit_comments ())
@@ -276,7 +295,10 @@ module Directive = struct
276295
| Some comment -> Printf.sprintf "\t/* %s */" comment
277296
in
278297
match t with
279-
| Align { bytes = n } ->
298+
| Align { bytes = n; fill_x86_bin_emitter = _ } ->
299+
(* The flag [fill_x86_bin_emitter] is only relevant for the binary
300+
emitter. On GAS, we can ignore it and just use [.align] in both
301+
cases. *)
280302
(* Some assemblers interpret the integer n as a 2^n alignment and others
281303
as a number of bytes. *)
282304
let n =
@@ -376,6 +398,14 @@ module Directive = struct
376398
Misc.fatal_error
377399
"Cannot emit [Direct_assignment] except on macOS-like assemblers")
378400
| Protected s -> bprintf buf "\t.protected\t%s" s
401+
| Hidden s -> bprintf buf "\t.hidden\t%s" s
402+
| Weak s -> bprintf buf "\t.weak\t%s" s
403+
(* masm only *)
404+
| External _ -> assert false
405+
| Reloc { offset; name; expr } ->
406+
bprintf buf "\t.reloc\t%a, %s, %a" Constant.print offset
407+
(reloc_type_to_string name)
408+
Constant.print expr
379409

380410
let print_masm buf t =
381411
let unsupported name =
@@ -390,7 +420,10 @@ module Directive = struct
390420
| Some comment -> Printf.sprintf "\t; %s" comment
391421
in
392422
match t with
393-
| Align { bytes } -> bprintf buf "\tALIGN\t%d" bytes
423+
| Align { bytes; fill_x86_bin_emitter = _ } ->
424+
(* The flag [fill_x86_bin_emitter] is only relevant for the x86 binary
425+
emitter. On MASM, we can ignore it. *)
426+
bprintf buf "\tALIGN\t%d" bytes
394427
| Bytes { str; comment } ->
395428
buf_bytes_directive buf ~directive:"BYTE" str;
396429
bprintf buf "%s" (masm_comment_opt comment)
@@ -436,6 +469,11 @@ module Directive = struct
436469
| Uleb128 _ -> unsupported "Uleb128"
437470
| Direct_assignment _ -> unsupported "Direct_assignment"
438471
| Protected _ -> unsupported "Protected"
472+
| Hidden _ -> unsupported "Hidden"
473+
| Weak _ -> unsupported "Weak"
474+
| External s -> bprintf buf "\tEXTRN\t%s: NEAR" s
475+
(* The only supported "type" on EXTRN declarations is NEAR. *)
476+
| Reloc _ -> unsupported "Reloc"
439477

440478
let print b t =
441479
match TS.assembler () with
@@ -480,6 +518,13 @@ let const_variable var = Variable var
480518

481519
let const_int64 i : expr = Signed_int i
482520

521+
let const_with_offset const (offset : int64) =
522+
if Int64.equal offset 0L
523+
then const
524+
else if Int64.compare offset 0L < 0
525+
then Sub (const, Signed_int (Int64.neg offset))
526+
else Add (const, Signed_int offset)
527+
483528
let emit_ref = ref None
484529

485530
let emit (d : Directive.t) =
@@ -492,7 +537,8 @@ let emit_non_masm (d : Directive.t) =
492537

493538
let section ~names ~flags ~args = emit (Section { names; flags; args })
494539

495-
let align ~bytes = emit (Align { bytes })
540+
let align ~fill_x86_bin_emitter ~bytes =
541+
emit (Align { bytes; fill_x86_bin_emitter })
496542

497543
let should_generate_cfi () =
498544
(* We generate CFI info even if we're not generating any other debugging
@@ -543,8 +589,16 @@ let indirect_symbol symbol = emit (Indirect_symbol (Asm_symbol.encode symbol))
543589

544590
let private_extern symbol = emit (Private_extern (Asm_symbol.encode symbol))
545591

592+
let extrn symbol = emit (External (Asm_symbol.encode symbol))
593+
594+
let hidden symbol = emit (Hidden (Asm_symbol.encode symbol))
595+
596+
let weak symbol = emit (Weak (Asm_symbol.encode symbol))
597+
546598
let size symbol cst = emit (Size (Asm_symbol.encode symbol, lower_expr cst))
547599

600+
let size_const sym n = emit (Size (Asm_symbol.encode sym, Signed_int n))
601+
548602
let type_ symbol ~type_ = emit (Type (symbol, type_))
549603

550604
let sleb128 ?comment i =
@@ -621,7 +675,7 @@ let label ?comment label = const_machine_width ?comment (Label label)
621675
let label_plus_offset ?comment lab ~offset_in_bytes =
622676
let offset_in_bytes = Targetint.to_int64 offset_in_bytes in
623677
let lab = const_label lab in
624-
const_machine_width ?comment (const_add lab (const_int64 offset_in_bytes))
678+
const_machine_width ?comment (const_with_offset lab offset_in_bytes)
625679

626680
let define_label label =
627681
let lbl_section = Asm_label.section label in
@@ -793,7 +847,7 @@ let symbol ?comment sym = const_machine_width ?comment (Symbol sym)
793847

794848
let symbol_plus_offset symbol ~offset_in_bytes =
795849
let offset_in_bytes = Targetint.to_int64 offset_in_bytes in
796-
const_machine_width (Add (Symbol symbol, Signed_int offset_in_bytes))
850+
const_machine_width (const_with_offset (Symbol symbol) offset_in_bytes)
797851

798852
let int8 ?comment i =
799853
const ?comment (Signed_int (Int64.of_int (Int8.to_int i))) Eight
@@ -884,9 +938,14 @@ let between_labels_16_bit ?comment:_ ~upper:_ ~lower:_ () =
884938
(* CR poechsel: use the arguments *)
885939
Misc.fatal_error "between_labels_16_bit not implemented yet"
886940

887-
let between_labels_32_bit ?comment:_ ~upper:_ ~lower:_ () =
888-
(* CR poechsel: use the arguments *)
889-
Misc.fatal_error "between_labels_32_bit not implemented yet"
941+
let between_labels_32_bit ?comment:_comment ~upper ~lower () =
942+
let expr = const_sub (const_label upper) (const_label lower) in
943+
(* CR sspies: Unlike in most of the other distance computation functions in
944+
this file, we do not force an assembly time constant in this function. This
945+
is to follow the existing/previous implementation of the x86 backend. In
946+
the future, we should investigate whether it would be more appropriate to
947+
force an assembly time constant. *)
948+
const expr Thirty_two
890949

891950
let between_labels_64_bit ?comment:_ ~upper:_ ~lower:_ () =
892951
(* CR poechsel: use the arguments *)
@@ -1059,3 +1118,14 @@ let offset_into_dwarf_section_symbol ?comment:_comment
10591118
match width with
10601119
| Thirty_two -> const expr Thirty_two
10611120
| Sixty_four -> const expr Sixty_four
1121+
1122+
let reloc_x86_64_plt32 ~offset_from_this ~target_symbol ~rel_offset_from_next =
1123+
emit
1124+
(Reloc
1125+
{ offset = Sub (This, Signed_int offset_from_this);
1126+
name = R_X86_64_PLT32;
1127+
expr =
1128+
Sub
1129+
( Named_thing (Asm_symbol.encode target_symbol),
1130+
Signed_int rel_offset_from_next )
1131+
})

backend/asm_targets/asm_directives_new.mli

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,16 @@ val cfi_def_cfa_register : reg:string -> unit
158158
supported on all platforms. *)
159159
val mark_stack_non_executable : unit -> unit
160160

161-
(** Leave as much space as is required to achieve the given alignment. *)
162-
val align : bytes:int -> unit
161+
type align_padding =
162+
| Nop
163+
| Zero
164+
165+
(** Leave as much space as is required to achieve the given alignment. On x86 in the
166+
binary emitter, it is important what the space is filled with: in the text section,
167+
one would typically fill it with [nop] instructions and in the data section, one
168+
would typically fill it with zeros. This is controlled by the parameter
169+
[fill_x86_bin_emitter]. *)
170+
val align : fill_x86_bin_emitter:align_padding -> bytes:int -> unit
163171

164172
(** Emit a directive giving the displacement between the given symbol and
165173
the current position. This should only be used to state sizes of
@@ -168,6 +176,8 @@ val align : bytes:int -> unit
168176
from that whose size is being stated (e.g. on POWER with ELF ABI v1). *)
169177
val size : ?size_of:Asm_symbol.t -> Asm_symbol.t -> unit
170178

179+
val size_const : Asm_symbol.t -> int64 -> unit
180+
171181
(** Leave a gap in the object file. *)
172182
val space : bytes:int -> unit
173183

@@ -197,6 +207,15 @@ val protected : Asm_symbol.t -> unit
197207
details). *)
198208
val private_extern : Asm_symbol.t -> unit
199209

210+
(** Mark an already encoded symbol as external. *)
211+
val extrn : Asm_symbol.t -> unit
212+
213+
(** Mark an already encoded symbol or label as hidden. *)
214+
val hidden : Asm_symbol.t -> unit
215+
216+
(** Mark an already encoded symbol or label as weak. *)
217+
val weak : Asm_symbol.t -> unit
218+
200219
(** Marker inside the definition of a lazy symbol stub (see platform or
201220
assembler documentation for details). *)
202221
val indirect_symbol : Asm_symbol.t -> unit
@@ -306,6 +325,12 @@ val offset_into_dwarf_section_symbol :
306325
Asm_symbol.t ->
307326
unit
308327

328+
val reloc_x86_64_plt32 :
329+
offset_from_this:int64 ->
330+
target_symbol:Asm_symbol.t ->
331+
rel_offset_from_next:int64 ->
332+
unit
333+
309334
module Directive : sig
310335
module Constant : sig
311336
(* CR sspies: make this private again once the first-class module has been
@@ -354,6 +379,10 @@ module Directive : sig
354379
removed *)
355380
type comment = string
356381

382+
(* ELF specific *)
383+
type reloc_type = R_X86_64_PLT32
384+
(* X86 only *)
385+
357386
(* CR sspies: make this private again once the first-class module has been
358387
removed *)
359388

@@ -363,7 +392,14 @@ module Directive : sig
363392
have had all necessary prefixing, mangling, escaping and suffixing
364393
applied. *)
365394
type t =
366-
| Align of { bytes : int }
395+
| Align of
396+
{ bytes : int;
397+
(** The number of bytes to align to. This will be taken log2 by the emitter on
398+
Arm and macOS platforms.*)
399+
fill_x86_bin_emitter : align_padding
400+
(** The [fill_x86_bin_emitter] flag controls whether the x86 binary emitter
401+
emits NOP instructions or null bytes. *)
402+
}
367403
| Bytes of
368404
{ str : string;
369405
comment : string option
@@ -417,6 +453,14 @@ module Directive : sig
417453
comment : string option
418454
}
419455
| Protected of string
456+
| Hidden of string
457+
| Weak of string
458+
| External of string
459+
| Reloc of
460+
{ offset : Constant.t;
461+
name : reloc_type;
462+
expr : Constant.t
463+
}
420464

421465
(** Translate the given directive to textual form. This produces output
422466
suitable for either gas or MASM as appropriate. *)

backend/asm_targets/asm_label.ml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ let create_string section label =
6969
assert (not (contains_escapable_char label));
7070
{ section; label = String label }
7171

72+
let create_string_unchecked section label = { section; label = String label }
73+
7274
let label_prefix =
7375
match Target_system.assembler () with MacOS -> "L" | MASM | GAS_like -> ".L"
7476

@@ -138,6 +140,7 @@ let for_dwarf_section (dwarf_section : Asm_section.dwarf_section) =
138140
| Debug_str -> Lazy.force debug_str_label
139141
| Debug_line -> Lazy.force debug_line_label
140142

143+
(* CR sspies: Remove the other cases where we never emit a label upfront. *)
141144
let for_section (section : Asm_section.t) =
142145
match section with
143146
| DWARF dwarf_section -> for_dwarf_section dwarf_section
@@ -147,3 +150,7 @@ let for_section (section : Asm_section.t) =
147150
| Eight_byte_literals -> Lazy.force eight_byte_literals_label
148151
| Sixteen_byte_literals -> Lazy.force sixteen_byte_literals_label
149152
| Jump_tables -> Lazy.force jump_tables_label
153+
| Stapsdt_base -> Misc.fatal_error "Stapsdt_base has no associated label"
154+
| Stapsdt_note -> Misc.fatal_error "Stapsdt_note has no associated label"
155+
| Probes -> Misc.fatal_error "Probes has no associated label"
156+
| Note_ocaml_eh -> Misc.fatal_error "Note_ocaml_eh has no associated label"

backend/asm_targets/asm_label.mli

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ val create_int : Asm_section.t -> int -> t
5252
(** Create a textual label. The supplied name must not require escaping. *)
5353
val create_string : Asm_section.t -> string -> t
5454

55+
(** Create a textual label. Argument string is not checked, so use with caution. *)
56+
val create_string_unchecked : Asm_section.t -> string -> t
57+
5558
(** Convert a label to the corresponding textual form, suitable for direct
5659
emission into an assembly file. This may be useful e.g. when emitting an
5760
instruction referencing a label. *)

0 commit comments

Comments
 (0)