Skip to content

Commit 52b835c

Browse files
committed
Store metadata separately in rlib files
Right now whenever an rlib file is linked against, all of the metadata from the rlib is pulled in to the final staticlib or binary. The reason for this is that the metadata is currently stored in a section of the object file. Note that this is intentional for dynamic libraries in order to distribute metadata bundled with static libraries. This commit alters the situation for rlib libraries to instead store the metadata in a separate file in the archive. In doing so, when the archive is passed to the linker, none of the metadata will get pulled into the result executable. Furthermore, the metadata file is skipped when assembling rlibs into an archive. The snag in this implementation comes with multiple output formats. When generating a dylib, the metadata needs to be in the object file, but when generating an rlib this needs to be separate. In order to accomplish this, the metadata variable is inserted into an entirely separate LLVM Module which is then codegen'd into a different location (foo.metadata.o). This is then linked into dynamic libraries and silently ignored for rlib files. While changing how metadata is inserted into archives, I have also stopped compressing metadata when inserted into rlib files. We have wanted to stop compressing metadata, but the sections it creates in object file sections are apparently too large. Thankfully if it's just an arbitrary file it doesn't matter how large it is. I have seen massive reductions in executable sizes, as well as staticlib output sizes (to confirm that this is all working).
1 parent 4e0cb31 commit 52b835c

File tree

7 files changed

+156
-78
lines changed

7 files changed

+156
-78
lines changed

src/librustc/back/archive.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use std::str;
2020
use extra::tempfile::TempDir;
2121
use syntax::abi;
2222

23+
pub static METADATA_FILENAME: &'static str = "metadata";
24+
2325
pub struct Archive {
2426
priv sess: Session,
2527
priv dst: Path,
@@ -81,17 +83,22 @@ impl Archive {
8183
/// search in the relevant locations for a library named `name`.
8284
pub fn add_native_library(&mut self, name: &str) {
8385
let location = self.find_library(name);
84-
self.add_archive(&location, name);
86+
self.add_archive(&location, name, []);
8587
}
8688

8789
/// Adds all of the contents of the rlib at the specified path to this
8890
/// archive.
8991
pub fn add_rlib(&mut self, rlib: &Path) {
9092
let name = rlib.filename_str().unwrap().split('-').next().unwrap();
91-
self.add_archive(rlib, name);
93+
self.add_archive(rlib, name, [METADATA_FILENAME]);
94+
}
95+
96+
/// Adds an arbitrary file to this archive
97+
pub fn add_file(&mut self, file: &Path) {
98+
run_ar(self.sess, "r", None, [&self.dst, file]);
9299
}
93100

94-
fn add_archive(&mut self, archive: &Path, name: &str) {
101+
fn add_archive(&mut self, archive: &Path, name: &str, skip: &[&str]) {
95102
let loc = TempDir::new("rsar").unwrap();
96103

97104
// First, extract the contents of the archive to a temporary directory
@@ -106,6 +113,7 @@ impl Archive {
106113
let mut inputs = ~[];
107114
for file in files.iter() {
108115
let filename = file.filename_str().unwrap();
116+
if skip.iter().any(|s| *s == filename) { continue }
109117
let filename = format!("r-{}-{}", name, filename);
110118
let new_filename = file.with_filename(filename);
111119
fs::rename(file, &new_filename);

src/librustc/back/link.rs

+96-27
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
// except according to those terms.
1010

1111

12-
use back::archive::Archive;
12+
use back::archive::{Archive, METADATA_FILENAME};
1313
use back::rpath;
14+
use driver::driver::CrateTranslation;
1415
use driver::session::Session;
1516
use driver::session;
1617
use lib::llvm::llvm;
@@ -88,10 +89,11 @@ pub mod write {
8889
use back::link::{output_type_assembly, output_type_bitcode};
8990
use back::link::{output_type_exe, output_type_llvm_assembly};
9091
use back::link::{output_type_object};
92+
use driver::driver::CrateTranslation;
9193
use driver::session::Session;
9294
use driver::session;
9395
use lib::llvm::llvm;
94-
use lib::llvm::{ModuleRef, ContextRef};
96+
use lib::llvm::ModuleRef;
9597
use lib;
9698

9799
use std::c_str::ToCStr;
@@ -101,10 +103,11 @@ pub mod write {
101103
use std::str;
102104

103105
pub fn run_passes(sess: Session,
104-
llcx: ContextRef,
105-
llmod: ModuleRef,
106+
trans: &CrateTranslation,
106107
output_type: output_type,
107108
output: &Path) {
109+
let llmod = trans.module;
110+
let llcx = trans.context;
108111
unsafe {
109112
llvm::LLVMInitializePasses();
110113

@@ -204,12 +207,23 @@ pub mod write {
204207
})
205208
}
206209

207-
// Create a codegen-specific pass manager to emit the actual
208-
// assembly or object files. This may not end up getting used,
209-
// but we make it anyway for good measure.
210-
let cpm = llvm::LLVMCreatePassManager();
211-
llvm::LLVMRustAddAnalysisPasses(tm, cpm, llmod);
212-
llvm::LLVMRustAddLibraryInfo(cpm, llmod);
210+
// A codegen-specific pass manager is used to generate object
211+
// files for an LLVM module.
212+
//
213+
// Apparently each of these pass managers is a one-shot kind of
214+
// thing, so we create a new one for each type of output. The
215+
// pass manager passed to the closure should be ensured to not
216+
// escape the closure itself, and the manager should only be
217+
// used once.
218+
fn with_codegen(tm: TargetMachineRef, llmod: ModuleRef,
219+
f: |PassManagerRef|) {
220+
let cpm = llvm::LLVMCreatePassManager();
221+
llvm::LLVMRustAddAnalysisPasses(tm, cpm, llmod);
222+
llvm::LLVMRustAddLibraryInfo(cpm, llmod);
223+
f(cpm);
224+
llvm::LLVMDisposePassManager(cpm);
225+
226+
}
213227

214228
match output_type {
215229
output_type_none => {}
@@ -220,20 +234,47 @@ pub mod write {
220234
}
221235
output_type_llvm_assembly => {
222236
output.with_c_str(|output| {
223-
llvm::LLVMRustPrintModule(cpm, llmod, output)
237+
with_codegen(tm, llmod, |cpm| {
238+
llvm::LLVMRustPrintModule(cpm, llmod, output);
239+
})
224240
})
225241
}
226242
output_type_assembly => {
227-
WriteOutputFile(sess, tm, cpm, llmod, output, lib::llvm::AssemblyFile);
243+
with_codegen(tm, llmod, |cpm| {
244+
WriteOutputFile(sess, tm, cpm, llmod, output,
245+
lib::llvm::AssemblyFile);
246+
});
247+
248+
// windows will invoke this function with an assembly output
249+
// type when it's actually generating an object file. This
250+
// is because g++ is used to compile the assembly instead of
251+
// having LLVM directly output an object file. Regardless,
252+
// in this case, we're going to possibly need a metadata
253+
// file.
254+
if sess.opts.output_type != output_type_assembly {
255+
with_codegen(tm, trans.metadata_module, |cpm| {
256+
let out = output.with_extension("metadata.o");
257+
WriteOutputFile(sess, tm, cpm,
258+
trans.metadata_module, &out,
259+
lib::llvm::ObjectFile);
260+
})
261+
}
228262
}
229263
output_type_exe | output_type_object => {
230-
WriteOutputFile(sess, tm, cpm, llmod, output, lib::llvm::ObjectFile);
264+
with_codegen(tm, llmod, |cpm| {
265+
WriteOutputFile(sess, tm, cpm, llmod, output,
266+
lib::llvm::ObjectFile);
267+
});
268+
with_codegen(tm, trans.metadata_module, |cpm| {
269+
WriteOutputFile(sess, tm, cpm, trans.metadata_module,
270+
&output.with_extension("metadata.o"),
271+
lib::llvm::ObjectFile);
272+
})
231273
}
232274
}
233275

234-
llvm::LLVMDisposePassManager(cpm);
235-
236276
llvm::LLVMRustDisposeTargetMachine(tm);
277+
llvm::LLVMDisposeModule(trans.metadata_module);
237278
llvm::LLVMDisposeModule(llmod);
238279
llvm::LLVMContextDispose(llcx);
239280
if sess.time_llvm_passes() { llvm::LLVMRustPrintPassTimings(); }
@@ -782,10 +823,9 @@ pub fn get_cc_prog(sess: Session) -> ~str {
782823
/// Perform the linkage portion of the compilation phase. This will generate all
783824
/// of the requested outputs for this compilation session.
784825
pub fn link_binary(sess: Session,
785-
crate_types: &[~str],
826+
trans: &CrateTranslation,
786827
obj_filename: &Path,
787-
out_filename: &Path,
788-
lm: LinkMeta) {
828+
out_filename: &Path) {
789829
let outputs = if sess.opts.test {
790830
// If we're generating a test executable, then ignore all other output
791831
// styles at all other locations
@@ -795,7 +835,7 @@ pub fn link_binary(sess: Session,
795835
// look at what was in the crate file itself for generating output
796836
// formats.
797837
let mut outputs = sess.opts.outputs.clone();
798-
for ty in crate_types.iter() {
838+
for ty in trans.crate_types.iter() {
799839
if "bin" == *ty {
800840
outputs.push(session::OutputExecutable);
801841
} else if "dylib" == *ty || "lib" == *ty {
@@ -813,12 +853,13 @@ pub fn link_binary(sess: Session,
813853
};
814854

815855
for output in outputs.move_iter() {
816-
link_binary_output(sess, output, obj_filename, out_filename, lm);
856+
link_binary_output(sess, trans, output, obj_filename, out_filename);
817857
}
818858

819-
// Remove the temporary object file if we aren't saving temps
859+
// Remove the temporary object file and metadata if we aren't saving temps
820860
if !sess.opts.save_temps {
821861
fs::unlink(obj_filename);
862+
fs::unlink(&obj_filename.with_extension("metadata.o"));
822863
}
823864
}
824865

@@ -832,11 +873,11 @@ fn is_writeable(p: &Path) -> bool {
832873
}
833874

834875
fn link_binary_output(sess: Session,
876+
trans: &CrateTranslation,
835877
output: session::OutputStyle,
836878
obj_filename: &Path,
837-
out_filename: &Path,
838-
lm: LinkMeta) {
839-
let libname = output_lib_filename(lm);
879+
out_filename: &Path) {
880+
let libname = output_lib_filename(trans.link);
840881
let out_filename = match output {
841882
session::OutputRlib => {
842883
out_filename.with_filename(format!("lib{}.rlib", libname))
@@ -874,7 +915,7 @@ fn link_binary_output(sess: Session,
874915

875916
match output {
876917
session::OutputRlib => {
877-
link_rlib(sess, obj_filename, &out_filename);
918+
link_rlib(sess, Some(trans), obj_filename, &out_filename);
878919
}
879920
session::OutputStaticlib => {
880921
link_staticlib(sess, obj_filename, &out_filename);
@@ -894,9 +935,25 @@ fn link_binary_output(sess: Session,
894935
// rlib primarily contains the object file of the crate, but it also contains
895936
// all of the object files from native libraries. This is done by unzipping
896937
// native libraries and inserting all of the contents into this archive.
897-
fn link_rlib(sess: Session, obj_filename: &Path,
938+
//
939+
// Instead of putting the metadata in an object file section, instead rlibs
940+
// contain the metadata in a separate file.
941+
fn link_rlib(sess: Session,
942+
trans: Option<&CrateTranslation>, // None == no metadata
943+
obj_filename: &Path,
898944
out_filename: &Path) -> Archive {
899945
let mut a = Archive::create(sess, out_filename, obj_filename);
946+
947+
match trans {
948+
Some(trans) => {
949+
let metadata = obj_filename.with_filename(METADATA_FILENAME);
950+
fs::File::create(&metadata).write(trans.metadata);
951+
a.add_file(&metadata);
952+
fs::unlink(&metadata);
953+
}
954+
None => {}
955+
}
956+
900957
for &(ref l, kind) in cstore::get_used_libraries(sess.cstore).iter() {
901958
match kind {
902959
cstore::NativeStatic => {
@@ -916,8 +973,12 @@ fn link_rlib(sess: Session, obj_filename: &Path,
916973
//
917974
// Additionally, there's no way for us to link dynamic libraries, so we warn
918975
// about all dynamic library dependencies that they're not linked in.
976+
//
977+
// There's no need to include metadata in a static archive, so ensure to not
978+
// link in the metadata object file (and also don't prepare the archive with a
979+
// metadata file).
919980
fn link_staticlib(sess: Session, obj_filename: &Path, out_filename: &Path) {
920-
let mut a = link_rlib(sess, obj_filename, out_filename);
981+
let mut a = link_rlib(sess, None, obj_filename, out_filename);
921982
a.add_native_library("morestack");
922983

923984
let crates = cstore::get_used_crates(sess.cstore, cstore::RequireStatic);
@@ -998,6 +1059,14 @@ fn link_args(sess: Session,
9981059
~"-o", out_filename.as_str().unwrap().to_owned(),
9991060
obj_filename.as_str().unwrap().to_owned()]);
10001061

1062+
// When linking a dynamic library, we put the metadata into a section of the
1063+
// executable. This metadata is in a separate object file from the main
1064+
// object file, so we link that in here.
1065+
if dylib {
1066+
let metadata = obj_filename.with_extension("metadata.o");
1067+
args.push(metadata.as_str().unwrap().to_owned());
1068+
}
1069+
10011070
if sess.targ_cfg.os == abi::OsLinux {
10021071
// GNU-style linkers will use this to omit linking to libraries which
10031072
// don't actually fulfill any relocations, but only for libraries which

src/librustc/driver/driver.rs

+6-7
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,10 @@ pub fn phase_3_run_analysis_passes(sess: Session,
335335
pub struct CrateTranslation {
336336
context: ContextRef,
337337
module: ModuleRef,
338+
metadata_module: ModuleRef,
338339
link: LinkMeta,
339340
crate_types: ~[~str],
341+
metadata: ~[u8],
340342
}
341343

342344
/// Run the translation phase to LLVM, after which the AST and analysis can
@@ -362,8 +364,7 @@ pub fn phase_5_run_llvm_passes(sess: Session,
362364

363365
time(sess.time_passes(), "LLVM passes", (), |_|
364366
link::write::run_passes(sess,
365-
trans.context,
366-
trans.module,
367+
trans,
367368
output_type,
368369
&asm_filename));
369370

@@ -376,8 +377,7 @@ pub fn phase_5_run_llvm_passes(sess: Session,
376377
} else {
377378
time(sess.time_passes(), "LLVM passes", (), |_|
378379
link::write::run_passes(sess,
379-
trans.context,
380-
trans.module,
380+
trans,
381381
sess.opts.output_type,
382382
&outputs.obj_filename));
383383
}
@@ -390,10 +390,9 @@ pub fn phase_6_link_output(sess: Session,
390390
outputs: &OutputFilenames) {
391391
time(sess.time_passes(), "linking", (), |_|
392392
link::link_binary(sess,
393-
trans.crate_types,
393+
trans,
394394
&outputs.obj_filename,
395-
&outputs.out_filename,
396-
trans.link));
395+
&outputs.out_filename));
397396
}
398397

399398
pub fn stop_after_phase_3(sess: Session) -> bool {

src/librustc/metadata/encoder.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ use middle::ty;
2121
use middle::typeck;
2222
use middle;
2323

24+
use std::cast;
2425
use std::hashmap::{HashMap, HashSet};
25-
use std::io::{Writer, Seek, Decorator};
2626
use std::io::mem::MemWriter;
27+
use std::io::{Writer, Seek, Decorator};
2728
use std::str;
29+
use std::util;
2830
use std::vec;
2931

30-
use extra::flate;
3132
use extra::serialize::Encodable;
3233
use extra;
3334

@@ -47,8 +48,6 @@ use syntax::parse::token;
4748
use syntax;
4849
use writer = extra::ebml::writer;
4950

50-
use std::cast;
51-
5251
// used by astencode:
5352
type abbrev_map = @mut HashMap<ty::t, tyencode::ty_abbrev>;
5453

@@ -1871,10 +1870,9 @@ pub fn encode_metadata(parms: EncodeParams, crate: &Crate) -> ~[u8] {
18711870
// remaining % 4 bytes.
18721871
wr.write(&[0u8, 0u8, 0u8, 0u8]);
18731872

1874-
let writer_bytes: &mut ~[u8] = wr.inner_mut_ref();
1875-
1876-
metadata_encoding_version.to_owned() +
1877-
flate::deflate_bytes(*writer_bytes)
1873+
// This is a horrible thing to do to the outer MemWriter, but thankfully we
1874+
// don't use it again so... it's ok right?
1875+
return util::replace(wr.inner_mut_ref(), ~[]);
18781876
}
18791877

18801878
// Get the encoded string for a type

0 commit comments

Comments
 (0)