Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the unstable option to reduce the binary size of dynamic library… #118636

Merged
merged 1 commit into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions compiler/rustc_session/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 347,7 @@ impl SwitchWithOptPath {
pub enum SymbolManglingVersion {
Legacy,
V0,
Hashed,
}

#[derive(Clone, Copy, Debug, PartialEq, Hash)]
Expand Down Expand Up @@ -2692,6 2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
match cg.symbol_mangling_version {
// Stable values:
None | Some(SymbolManglingVersion::V0) => {}

// Unstable values:
Some(SymbolManglingVersion::Legacy) => {
if !unstable_opts.unstable_options {
Expand All @@ -2700,6 2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
);
}
}
Some(SymbolManglingVersion::Hashed) => {
if !unstable_opts.unstable_options {
early_dcx.early_fatal(
"`-C symbol-mangling-version=hashed` requires `-Z unstable-options`",
);
}
}
michaelwoerister marked this conversation as resolved.
Show resolved Hide resolved
}

// Check for unstable values of `-C instrument-coverage`.
Expand Down Expand Up @@ -2741,6 2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
);
}
Some(SymbolManglingVersion::V0) => {}
Some(SymbolManglingVersion::Hashed) => {
early_dcx.early_warn(
"-C instrument-coverage requires symbol mangling version `v0`, \
but `-C symbol-mangling-version=hashed` was specified",
);
}
}
}

Expand Down
6 changes: 4 additions & 2 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 407,8 @@ mod desc {
pub const parse_switch_with_opt_path: &str =
"an optional path to the profiling data output directory";
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
pub const parse_symbol_mangling_version: &str =
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
pub const parse_relocation_model: &str =
"one of supported relocation models (`rustc --print relocation-models`)";
Expand Down Expand Up @@ -1180,6 1181,7 @@ mod parse {
*slot = match v {
Some("legacy") => Some(SymbolManglingVersion::Legacy),
Some("v0") => Some(SymbolManglingVersion::V0),
Some("hashed") => Some(SymbolManglingVersion::Hashed),
michaelwoerister marked this conversation as resolved.
Show resolved Hide resolved
_ => return false,
};
true
Expand Down Expand Up @@ -1504,7 1506,7 @@ options! {
"tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"),
symbol_mangling_version: Option<SymbolManglingVersion> = (None,
parse_symbol_mangling_version, [TRACKED],
"which mangling version to use for symbol names ('legacy' (default) or 'v0')"),
"which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"),
target_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
"select target processor (`rustc --print target-cpus` for details)"),
target_feature: String = (String::new(), parse_target_feature, [TRACKED],
Expand Down
43 changes: 43 additions & 0 deletions compiler/rustc_symbol_mangling/src/hashed.rs
Original file line number Diff line number Diff line change
@@ -0,0 1,43 @@
use crate::v0;
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_hir::def_id::CrateNum;
use rustc_middle::ty::{Instance, TyCtxt};

use std::fmt::Write;

pub(super) fn mangle<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
instantiating_crate: Option<CrateNum>,
full_mangling_name: impl FnOnce() -> String,
) -> String {
// The symbol of a generic function may be scattered in multiple downstream dylibs.
// If the symbol of a generic function still contains `crate name`, hash conflicts between the
// generic funcion and other symbols of the same `crate` cannot be detected in time during
// construction. This symbol conflict is left over until it occurs during run time.
// In this case, `instantiating-crate name` is used to replace `crate name` can completely
// eliminate the risk of the preceding potential hash conflict.
let crate_num =
if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };

let mut symbol = "_RNxC".to_string();
v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);

let hash = tcx.with_stable_hashing_context(|mut hcx| {
let mut hasher = StableHasher::new();
full_mangling_name().hash_stable(&mut hcx, &mut hasher);
hasher.finish::<Hash64>().as_u64()
});

push_hash64(hash, &mut symbol);

symbol
}

// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
// not help prevent hash collisions
fn push_hash64(hash: u64, output: &mut String) {
let hash = v0::encode_integer_62(hash);
let hash_len = hash.len();
let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
}
4 changes: 4 additions & 0 deletions compiler/rustc_symbol_mangling/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 111,7 @@ use rustc_middle::query::Providers;
use rustc_middle::ty::{self, Instance, TyCtxt};
use rustc_session::config::SymbolManglingVersion;

mod hashed;
mod legacy;
mod v0;

Expand Down Expand Up @@ -265,6 266,9 @@ fn compute_symbol_name<'tcx>(
let symbol = match mangling_version {
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
v0::mangle(tcx, instance, instantiating_crate)
}),
};

debug_assert!(
Expand Down
104 changes: 61 additions & 43 deletions compiler/rustc_symbol_mangling/src/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
fn push_integer_62(&mut self, x: u64) {
if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, 62, &mut self.out);
}
self.push("_");
push_integer_62(x, &mut self.out)
}

/// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
Expand All @@ -138,45 135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
}

fn push_ident(&mut self, ident: &str) {
let mut use_punycode = false;
for b in ident.bytes() {
match b {
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
0x80..=0xff => use_punycode = true,
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
}
}

let punycode_string;
let ident = if use_punycode {
self.push("u");

// FIXME(eddyb) we should probably roll our own punycode implementation.
let mut punycode_bytes = match punycode::encode(ident) {
Ok(s) => s.into_bytes(),
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
};

// Replace `-` with `_`.
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
*c = b'_';
}

// FIXME(eddyb) avoid rechecking UTF-8 validity.
punycode_string = String::from_utf8(punycode_bytes).unwrap();
&punycode_string
} else {
ident
};

let _ = write!(self.out, "{}", ident.len());

// Write a separating `_` if necessary (leading digit or `_`).
if let Some('_' | '0'..='9') = ident.chars().next() {
self.push("_");
}

self.push(ident);
push_ident(ident, &mut self.out)
}

fn path_append_ns(
Expand Down Expand Up @@ -836,3 795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
Ok(())
}
}
/// Push a `_`-terminated base 62 integer, using the format
/// specified in the RFC as `<base-62-number>`, that is:
/// * `x = 0` is encoded as just the `"_"` terminator
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, 62, output);
}
output.push('_');
}

pub(crate) fn encode_integer_62(x: u64) -> String {
let mut output = String::new();
push_integer_62(x, &mut output);
output
}

pub(crate) fn push_ident(ident: &str, output: &mut String) {
let mut use_punycode = false;
for b in ident.bytes() {
match b {
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
0x80..=0xff => use_punycode = true,
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
}
}

let punycode_string;
let ident = if use_punycode {
output.push('u');

// FIXME(eddyb) we should probably roll our own punycode implementation.
let mut punycode_bytes = match punycode::encode(ident) {
Ok(s) => s.into_bytes(),
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
};

// Replace `-` with `_`.
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
*c = b'_';
}

// FIXME(eddyb) avoid rechecking UTF-8 validity.
punycode_string = String::from_utf8(punycode_bytes).unwrap();
&punycode_string
} else {
ident
};

let _ = write!(output, "{}", ident.len());

// Write a separating `_` if necessary (leading digit or `_`).
if let Some('_' | '0'..='9') = ident.chars().next() {
output.push('_');
}

output.push_str(ident);
}
48 changes: 48 additions & 0 deletions tests/run-make/symbol-mangling-hashed/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 1,48 @@
include ../tools.mk

# ignore-cross-compile
# only-linux
# only-x86_64

NM=nm -D
RLIB_NAME=liba_rlib.rlib
DYLIB_NAME=liba_dylib.so
SO_NAME=libb_dylib.so
BIN_NAME=b_bin

ifeq ($(UNAME),Darwin)
NM=nm -gU
RLIB_NAME=liba_rlib.rlib
DYLIB_NAME=liba_dylib.dylib
SO_NAME=libb_dylib.dylib
BIN_NAME=b_bin
endif

ifdef IS_WINDOWS
NM=nm -g
RLIB_NAME=liba_rlib.dll.a
DYLIB_NAME=liba_dylib.dll
SO_NAME=libb_dylib.dll
BIN_NAME=b_bin.exe
endif

all:
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs

# Check hashed symbol name

[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ]
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ]

[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]

[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ]

$(call RUN,$(BIN_NAME))
4 changes: 4 additions & 0 deletions tests/run-make/symbol-mangling-hashed/a_dylib.rs
Original file line number Diff line number Diff line change
@@ -0,0 1,4 @@
#![crate_type="dylib"]
pub fn hello() {
println!("hello dylib");
}
5 changes: 5 additions & 0 deletions tests/run-make/symbol-mangling-hashed/a_rlib.rs
Original file line number Diff line number Diff line change
@@ -0,0 1,5 @@
#![crate_type="rlib"]

pub fn hello() {
println!("hello rlib");
}
9 changes: 9 additions & 0 deletions tests/run-make/symbol-mangling-hashed/b_bin.rs
Original file line number Diff line number Diff line change
@@ -0,0 1,9 @@
extern crate a_rlib;
extern crate a_dylib;
extern crate b_dylib;

fn main() {
a_rlib::hello();
a_dylib::hello();
b_dylib::hello();
}
9 changes: 9 additions & 0 deletions tests/run-make/symbol-mangling-hashed/b_dylib.rs
Original file line number Diff line number Diff line change
@@ -0,0 1,9 @@
#![crate_type="dylib"]

extern crate a_rlib;
extern crate a_dylib;

pub fn hello() {
a_rlib::hello();
a_dylib::hello();
}
2 changes: 1 addition & 1 deletion tests/ui/symbol-mangling-version/bad-value.bad.stderr
Original file line number Diff line number Diff line change
@@ -1,2 1,2 @@
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected

2 changes: 1 addition & 1 deletion tests/ui/symbol-mangling-version/bad-value.blank.stderr
Original file line number Diff line number Diff line change
@@ -1,2 1,2 @@
error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected

2 changes: 1 addition & 1 deletion tests/ui/symbol-mangling-version/bad-value.no-value.stderr
Original file line number Diff line number Diff line change
@@ -1,2 1,2 @@
error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=<value>)
error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=<value>)

2 changes: 2 additions & 0 deletions tests/ui/symbol-mangling-version/unstable.hashed.stderr
Original file line number Diff line number Diff line change
@@ -0,0 1,2 @@
error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options`

5 changes: 4 additions & 1 deletion tests/ui/symbol-mangling-version/unstable.rs
Original file line number Diff line number Diff line change
@@ -1,6 1,9 @@
// revisions: legacy legacy-ok
// revisions: legacy legacy-ok hashed hashed-ok
// [legacy] compile-flags: -Csymbol-mangling-version=legacy
// [legacy-ok] check-pass
// [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
// [hashed] compile-flags: -Csymbol-mangling-version=hashed
// [hashed-ok] check-pass
// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed

fn main() {}
Loading