Skip to content

Use new directives on x86 #3931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 8, 2025
855 changes: 454 additions & 401 deletions backend/amd64/emit.ml

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions backend/arm64/emit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ let emit_literals p align emit_literal =
(* CR sspies: The following section is incorrect. We are in a data section
here. Fix this when cleaning up the section mechanism. *)
D.unsafe_set_internal_section_ref Text);
D.align ~bytes:align;
D.align ~fill_x86_bin_emitter:Nop ~bytes:align;
List.iter emit_literal !p;
p := [])

Expand Down Expand Up @@ -2069,7 +2069,7 @@ let fundecl fundecl =
contains_calls := fundecl.fun_contains_calls;
emit_named_text_section !function_name;
let fun_sym = S.create fundecl.fun_name in
D.align ~bytes:8;
D.align ~fill_x86_bin_emitter:Nop ~bytes:8;
D.global fun_sym;
D.type_symbol ~ty:Function fun_sym;
D.define_symbol_label ~section:Text fun_sym;
Expand Down Expand Up @@ -2130,11 +2130,11 @@ let emit_item (d : Cmm.data_item) =
D.symbol_plus_offset ~offset_in_bytes:(Targetint.of_int o) sym
| Cstring s -> D.string s
| Cskip n -> D.space ~bytes:n
| Calign n -> D.align ~bytes:n
| Calign n -> D.align ~fill_x86_bin_emitter:Zero ~bytes:n

let data l =
D.data ();
D.align ~bytes:8;
D.align ~fill_x86_bin_emitter:Zero ~bytes:8;
List.iter emit_item l

let file_emitter ~file_num ~file_name =
Expand Down Expand Up @@ -2172,7 +2172,7 @@ let begin_assembly _unix =
if macosx
then (
DSL.ins I.NOP [||];
D.align ~bytes:8);
D.align ~fill_x86_bin_emitter:Nop ~bytes:8);
let code_end = Cmm_helpers.make_symbol "code_end" in
Emitaux.Dwarf_helpers.begin_dwarf ~code_begin ~code_end ~file_emitter

Expand All @@ -2190,7 +2190,7 @@ let end_assembly () =
D.global data_end_sym;
D.define_symbol_label ~section:Data data_end_sym;
D.int64 0L;
D.align ~bytes:8;
D.align ~fill_x86_bin_emitter:Zero ~bytes:8;
(* #7887 *)
let frametable = Cmm_helpers.make_symbol "frametable" in
let frametable_sym = S.create frametable in
Expand All @@ -2213,7 +2213,7 @@ let end_assembly () =
(* CR sspies: for some reason, we can get negative numbers here *)
efa_32 = (fun n -> D.int32 n);
efa_word = (fun n -> D.targetint (Targetint.of_int_exn n));
efa_align = (fun n -> D.align ~bytes:n);
efa_align = (fun n -> D.align ~fill_x86_bin_emitter:Zero ~bytes:n);
efa_label_rel =
(fun lbl ofs ->
let lbl = label_to_asm_label ~section:Data lbl in
Expand Down
88 changes: 79 additions & 9 deletions backend/asm_targets/asm_directives_new.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ type symbol_type =
| Function
| Object

type align_padding =
| Nop
| Zero

(* CR sspies: We should use the "STT" forms when they are supported as they are
unambiguous across platforms (cf.
https://sourceware.org/binutils/docs/as/Type.html). *)
Expand Down Expand Up @@ -145,10 +149,15 @@ module Directive = struct
| Code
| Machine_width_data

type reloc_type = R_X86_64_PLT32

type comment = string

type t =
| Align of { bytes : int }
| Align of
{ bytes : int;
fill_x86_bin_emitter : align_padding
}
| Bytes of
{ str : string;
comment : string option
Expand Down Expand Up @@ -202,6 +211,14 @@ module Directive = struct
comment : string option
}
| Protected of string
| Hidden of string
| Weak of string
| External of string
| Reloc of
{ offset : Constant.t;
name : reloc_type;
expr : Constant.t
}

let bprintf = Printf.bprintf

Expand Down Expand Up @@ -266,6 +283,8 @@ module Directive = struct
bprintf buf "\t.ascii\t\"%s\""
(string_of_string_literal (String.sub s !i (l - !i)))

let reloc_type_to_string = function R_X86_64_PLT32 -> "R_X86_64_PLT32"

let print_gas buf t =
let gas_comment_opt comment_opt =
if not (emit_comments ())
Expand All @@ -276,7 +295,10 @@ module Directive = struct
| Some comment -> Printf.sprintf "\t/* %s */" comment
in
match t with
| Align { bytes = n } ->
| Align { bytes = n; fill_x86_bin_emitter = _ } ->
(* The flag [fill_x86_bin_emitter] is only relevant for the binary
emitter. On GAS, we can ignore it and just use [.align] in both
cases. *)
(* Some assemblers interpret the integer n as a 2^n alignment and others
as a number of bytes. *)
let n =
Expand Down Expand Up @@ -376,6 +398,14 @@ module Directive = struct
Misc.fatal_error
"Cannot emit [Direct_assignment] except on macOS-like assemblers")
| Protected s -> bprintf buf "\t.protected\t%s" s
| Hidden s -> bprintf buf "\t.hidden\t%s" s
| Weak s -> bprintf buf "\t.weak\t%s" s
(* masm only *)
| External _ -> assert false
| Reloc { offset; name; expr } ->
bprintf buf "\t.reloc\t%a, %s, %a" Constant.print offset
(reloc_type_to_string name)
Constant.print expr

let print_masm buf t =
let unsupported name =
Expand All @@ -390,7 +420,10 @@ module Directive = struct
| Some comment -> Printf.sprintf "\t; %s" comment
in
match t with
| Align { bytes } -> bprintf buf "\tALIGN\t%d" bytes
| Align { bytes; fill_x86_bin_emitter = _ } ->
(* The flag [fill_x86_bin_emitter] is only relevant for the x86 binary
emitter. On MASM, we can ignore it. *)
bprintf buf "\tALIGN\t%d" bytes
| Bytes { str; comment } ->
buf_bytes_directive buf ~directive:"BYTE" str;
bprintf buf "%s" (masm_comment_opt comment)
Expand Down Expand Up @@ -436,6 +469,11 @@ module Directive = struct
| Uleb128 _ -> unsupported "Uleb128"
| Direct_assignment _ -> unsupported "Direct_assignment"
| Protected _ -> unsupported "Protected"
| Hidden _ -> unsupported "Hidden"
| Weak _ -> unsupported "Weak"
| External s -> bprintf buf "\tEXTRN\t%s: NEAR" s
(* The only supported "type" on EXTRN declarations is NEAR. *)
| Reloc _ -> unsupported "Reloc"

let print b t =
match TS.assembler () with
Expand Down Expand Up @@ -480,6 +518,13 @@ let const_variable var = Variable var

let const_int64 i : expr = Signed_int i

let const_with_offset const (offset : int64) =
if Int64.equal offset 0L
then const
else if Int64.compare offset 0L < 0
then Sub (const, Signed_int (Int64.neg offset))
else Add (const, Signed_int offset)

let emit_ref = ref None

let emit (d : Directive.t) =
Expand All @@ -492,7 +537,8 @@ let emit_non_masm (d : Directive.t) =

let section ~names ~flags ~args = emit (Section { names; flags; args })

let align ~bytes = emit (Align { bytes })
let align ~fill_x86_bin_emitter ~bytes =
emit (Align { bytes; fill_x86_bin_emitter })

let should_generate_cfi () =
(* We generate CFI info even if we're not generating any other debugging
Expand Down Expand Up @@ -543,8 +589,16 @@ let indirect_symbol symbol = emit (Indirect_symbol (Asm_symbol.encode symbol))

let private_extern symbol = emit (Private_extern (Asm_symbol.encode symbol))

let extrn symbol = emit (External (Asm_symbol.encode symbol))

let hidden symbol = emit (Hidden (Asm_symbol.encode symbol))

let weak symbol = emit (Weak (Asm_symbol.encode symbol))

let size symbol cst = emit (Size (Asm_symbol.encode symbol, lower_expr cst))

let size_const sym n = emit (Size (Asm_symbol.encode sym, Signed_int n))

let type_ symbol ~type_ = emit (Type (symbol, type_))

let sleb128 ?comment i =
Expand Down Expand Up @@ -621,7 +675,7 @@ let label ?comment label = const_machine_width ?comment (Label label)
let label_plus_offset ?comment lab ~offset_in_bytes =
let offset_in_bytes = Targetint.to_int64 offset_in_bytes in
let lab = const_label lab in
const_machine_width ?comment (const_add lab (const_int64 offset_in_bytes))
const_machine_width ?comment (const_with_offset lab offset_in_bytes)

let define_label label =
let lbl_section = Asm_label.section label in
Expand Down Expand Up @@ -793,7 +847,7 @@ let symbol ?comment sym = const_machine_width ?comment (Symbol sym)

let symbol_plus_offset symbol ~offset_in_bytes =
let offset_in_bytes = Targetint.to_int64 offset_in_bytes in
const_machine_width (Add (Symbol symbol, Signed_int offset_in_bytes))
const_machine_width (const_with_offset (Symbol symbol) offset_in_bytes)

let int8 ?comment i =
const ?comment (Signed_int (Int64.of_int (Int8.to_int i))) Eight
Expand Down Expand Up @@ -884,9 +938,14 @@ let between_labels_16_bit ?comment:_ ~upper:_ ~lower:_ () =
(* CR poechsel: use the arguments *)
Misc.fatal_error "between_labels_16_bit not implemented yet"

let between_labels_32_bit ?comment:_ ~upper:_ ~lower:_ () =
(* CR poechsel: use the arguments *)
Misc.fatal_error "between_labels_32_bit not implemented yet"
let between_labels_32_bit ?comment:_comment ~upper ~lower () =
let expr = const_sub (const_label upper) (const_label lower) in
(* CR sspies: Unlike in most of the other distance computation functions in
this file, we do not force an assembly time constant in this function. This
is to follow the existing/previous implementation of the x86 backend. In
the future, we should investigate whether it would be more appropriate to
force an assembly time constant. *)
const expr Thirty_two

let between_labels_64_bit ?comment:_ ~upper:_ ~lower:_ () =
(* CR poechsel: use the arguments *)
Expand Down Expand Up @@ -1059,3 +1118,14 @@ let offset_into_dwarf_section_symbol ?comment:_comment
match width with
| Thirty_two -> const expr Thirty_two
| Sixty_four -> const expr Sixty_four

let reloc_x86_64_plt32 ~offset_from_this ~target_symbol ~rel_offset_from_next =
emit
(Reloc
{ offset = Sub (This, Signed_int offset_from_this);
name = R_X86_64_PLT32;
expr =
Sub
( Named_thing (Asm_symbol.encode target_symbol),
Signed_int rel_offset_from_next )
})
50 changes: 47 additions & 3 deletions backend/asm_targets/asm_directives_new.mli
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,16 @@ val cfi_def_cfa_register : reg:string -> unit
supported on all platforms. *)
val mark_stack_non_executable : unit -> unit

(** Leave as much space as is required to achieve the given alignment. *)
val align : bytes:int -> unit
type align_padding =
| Nop
| Zero

(** Leave as much space as is required to achieve the given alignment. On x86 in the
binary emitter, it is important what the space is filled with: in the text section,
one would typically fill it with [nop] instructions and in the data section, one
would typically fill it with zeros. This is controlled by the parameter
[fill_x86_bin_emitter]. *)
val align : fill_x86_bin_emitter:align_padding -> bytes:int -> unit

(** Emit a directive giving the displacement between the given symbol and
the current position. This should only be used to state sizes of
Expand All @@ -168,6 +176,8 @@ val align : bytes:int -> unit
from that whose size is being stated (e.g. on POWER with ELF ABI v1). *)
val size : ?size_of:Asm_symbol.t -> Asm_symbol.t -> unit

val size_const : Asm_symbol.t -> int64 -> unit

(** Leave a gap in the object file. *)
val space : bytes:int -> unit

Expand Down Expand Up @@ -197,6 +207,15 @@ val protected : Asm_symbol.t -> unit
details). *)
val private_extern : Asm_symbol.t -> unit

(** Mark an already encoded symbol as external. *)
val extrn : Asm_symbol.t -> unit

(** Mark an already encoded symbol or label as hidden. *)
val hidden : Asm_symbol.t -> unit

(** Mark an already encoded symbol or label as weak. *)
val weak : Asm_symbol.t -> unit

(** Marker inside the definition of a lazy symbol stub (see platform or
assembler documentation for details). *)
val indirect_symbol : Asm_symbol.t -> unit
Expand Down Expand Up @@ -306,6 +325,12 @@ val offset_into_dwarf_section_symbol :
Asm_symbol.t ->
unit

val reloc_x86_64_plt32 :
offset_from_this:int64 ->
target_symbol:Asm_symbol.t ->
rel_offset_from_next:int64 ->
unit

module Directive : sig
module Constant : sig
(* CR sspies: make this private again once the first-class module has been
Expand Down Expand Up @@ -354,6 +379,10 @@ module Directive : sig
removed *)
type comment = string

(* ELF specific *)
type reloc_type = R_X86_64_PLT32
(* X86 only *)

(* CR sspies: make this private again once the first-class module has been
removed *)

Expand All @@ -363,7 +392,14 @@ module Directive : sig
have had all necessary prefixing, mangling, escaping and suffixing
applied. *)
type t =
| Align of { bytes : int }
| Align of
{ bytes : int;
(** The number of bytes to align to. This will be taken log2 by the emitter on
Arm and macOS platforms.*)
fill_x86_bin_emitter : align_padding
(** The [fill_x86_bin_emitter] flag controls whether the x86 binary emitter
emits NOP instructions or null bytes. *)
}
| Bytes of
{ str : string;
comment : string option
Expand Down Expand Up @@ -417,6 +453,14 @@ module Directive : sig
comment : string option
}
| Protected of string
| Hidden of string
| Weak of string
| External of string
| Reloc of
{ offset : Constant.t;
name : reloc_type;
expr : Constant.t
}

(** Translate the given directive to textual form. This produces output
suitable for either gas or MASM as appropriate. *)
Expand Down
7 changes: 7 additions & 0 deletions backend/asm_targets/asm_label.ml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ let create_string section label =
assert (not (contains_escapable_char label));
{ section; label = String label }

let create_string_unchecked section label = { section; label = String label }

let label_prefix =
match Target_system.assembler () with MacOS -> "L" | MASM | GAS_like -> ".L"

Expand Down Expand Up @@ -138,6 +140,7 @@ let for_dwarf_section (dwarf_section : Asm_section.dwarf_section) =
| Debug_str -> Lazy.force debug_str_label
| Debug_line -> Lazy.force debug_line_label

(* CR sspies: Remove the other cases where we never emit a label upfront. *)
let for_section (section : Asm_section.t) =
match section with
| DWARF dwarf_section -> for_dwarf_section dwarf_section
Expand All @@ -147,3 +150,7 @@ let for_section (section : Asm_section.t) =
| Eight_byte_literals -> Lazy.force eight_byte_literals_label
| Sixteen_byte_literals -> Lazy.force sixteen_byte_literals_label
| Jump_tables -> Lazy.force jump_tables_label
| Stapsdt_base -> Misc.fatal_error "Stapsdt_base has no associated label"
| Stapsdt_note -> Misc.fatal_error "Stapsdt_note has no associated label"
| Probes -> Misc.fatal_error "Probes has no associated label"
| Note_ocaml_eh -> Misc.fatal_error "Note_ocaml_eh has no associated label"
3 changes: 3 additions & 0 deletions backend/asm_targets/asm_label.mli
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ val create_int : Asm_section.t -> int -> t
(** Create a textual label. The supplied name must not require escaping. *)
val create_string : Asm_section.t -> string -> t

(** Create a textual label. Argument string is not checked, so use with caution. *)
val create_string_unchecked : Asm_section.t -> string -> t

(** Convert a label to the corresponding textual form, suitable for direct
emission into an assembly file. This may be useful e.g. when emitting an
instruction referencing a label. *)
Expand Down
Loading
Loading