Auto merge of #12881 - epage:intern, r=ehuss

refactor(toml): Decouple parsing from interning system ### What does this PR try to resolve? To have a separate manifest API (#12801), we can't rely on interning because it might be used in longer-lifetime applications. To keep this limited in scope, this only removes `InternedString` from manifest parsing. Everything else still uses `InternedString`. ### How should we test and review this PR? I had problems getting the cargo benchmarks running, so I did a quick and dirty benchmark that is end-to-end, covering fresh builds, clean builds, and resolution. I ran these against a fresh clone of cargo's code base. See [my comment](#12801 (comment)) for the script that managed the benchmarks. Benchmarks: <details> ```console $ ../dump/cargo-12801-bench.rs run Finished dev [unoptimized debuginfo] target(s) in 0.07s Running `target/debug/cargo -Zscript -Zmsrv-policy ../dump/cargo-12801-bench.rs run` warning: `package.edition` is unspecified, defaulting to `2021` Finished dev [unoptimized debuginfo] target(s) in 0.04s Running `/home/epage/.cargo/target/0a/7f4c1ab500f045/debug/cargo-12801-bench run` $ hyperfine "../cargo-old check" "../cargo-new check" Benchmark 1: ../cargo-old check Time (mean ± σ): 119.3 ms ± 3.2 ms [User: 98.6 ms, System: 20.3 ms] Range (min … max): 115.6 ms … 124.3 ms 24 runs Benchmark 2: ../cargo-new check Time (mean ± σ): 119.4 ms ± 2.4 ms [User: 98.0 ms, System: 21.1 ms] Range (min … max): 115.7 ms … 123.6 ms 24 runs Summary ../cargo-old check ran 1.00 ± 0.03 times faster than ../cargo-new check $ hyperfine --prepare "cargo clean" "../cargo-old check" "../cargo-new check" Benchmark 1: ../cargo-old check Time (mean ± σ): 20.249 s ± 0.392 s [User: 157.719 s, System: 22.771 s] Range (min … max): 19.605 s … 21.123 s 10 runs Benchmark 2: ../cargo-new check Time (mean ± σ): 20.123 s ± 0.212 s [User: 156.156 s, System: 22.325 s] Range (min … max): 19.764 s … 20.420 s 10 runs Summary ../cargo-new check ran 1.01 ± 0.02 times faster than ../cargo-old check $ hyperfine --prepare "cargo clean && rm -f Cargo.lock" "../cargo-old check" "../cargo-new check" Benchmark 1: ../cargo-old check Time (mean ± σ): 21.105 s ± 0.465 s [User: 156.482 s, System: 22.799 s] Range (min … max): 20.156 s … 22.010 s 10 runs Benchmark 2: ../cargo-new check Time (mean ± σ): 21.358 s ± 0.538 s [User: 156.187 s, System: 22.979 s] Range (min … max): 20.703 s … 22.462 s 10 runs Summary ../cargo-old check ran 1.01 ± 0.03 times faster than ../cargo-new check ``` </details> ### Additional information I consulted https://github.com/rosetta-rs/string-rosetta-rs when deciding on what string type to use for performance. Originally, I hoped to entirely replacing string interning. For that, I was looking at `arcstr` as it had a fast equality operator. However, that is only helpful so long as the two strings we are comparing came from the original source. Unsure how likely that is to happen (and daunted by converting all of the `Copy`s into `Clone`s), I decided to scale back. Concerned about all of the small allocations when parsing a manifest, I assumed I'd need a string type with small-string optimizations, like `hipstr`, `compact_str`, `flexstr`, and `ecow`. The first three give us more wiggle room and `hipstr` was the fastest of them, so I went with that. I then double checked macro benchmarks, and realized `hipstr` made no difference and switched to `String` to keep things simple / with lower dependencies. When doing this, I had created a type alias (`TomlStr`) for the string type so I could more easily swap it out if needed (and not have to always write out a lifetime). With just using `String`, I went ahead and dropped that.
rust-lang · Oct 28, 2023 · d1830f6 · d1830f6
2 parents 708383d acc52f3
commit d1830f6
Show file tree

Hide file tree

Showing 6 changed files with 79 additions and 67 deletions.
diff --git a/src/bin/cargo/commands/remove.rs b/src/bin/cargo/commands/remove.rs
@@ -286,7 286,7 @@ fn spec_has_match(
  config: &Config,
 ) -> CargoResult<bool> {
  for dep in dependencies {
- if spec.name().as_str() != &dep.name {
  if spec.name() != &dep.name {
  continue;
  }
 

diff --git a/src/cargo/core/package_id_spec.rs b/src/cargo/core/package_id_spec.rs
@@ -9,7 9,6 @@ use url::Url;
 use crate::core::PackageId;
 use crate::util::edit_distance;
 use crate::util::errors::CargoResult;
-use crate::util::interning::InternedString;
 use crate::util::PartialVersion;
 use crate::util::{validate_package_name, IntoUrl};
 
@@ -24,7 23,7 @@ use crate::util::{validate_package_name, IntoUrl};
 /// sufficient to uniquely define a package ID.
 #[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
 pub struct PackageIdSpec {
- name: InternedString,
  name: String,
  version: Option<PartialVersion>,
  url: Option<Url>,
 }
@@ -76,7 75,7 @@ impl PackageIdSpec {
  };
  validate_package_name(name, "pkgid", "")?;
  Ok(PackageIdSpec {
- name: InternedString::new(name),
  name: String::from(name),
  version,
  url: None,
  })
@@ -99,7 98,7 @@ impl PackageIdSpec {
  /// fields filled in.
  pub fn from_package_id(package_id: PackageId) -> PackageIdSpec {
  PackageIdSpec {
- name: package_id.name(),
  name: String::from(package_id.name().as_str()),
  version: Some(package_id.version().clone().into()),
  url: Some(package_id.source_id().url().clone()),
  }
@@ -127,18 126,18 @@ impl PackageIdSpec {
  Some(fragment) => match fragment.split_once([':', '@']) {
  Some((name, part)) => {
  let version = part.parse::<PartialVersion>()?;
- (InternedString::new(name), Some(version))
  (String::from(name), Some(version))
  }
  None => {
  if fragment.chars().next().unwrap().is_alphabetic() {
- (InternedString::new(&fragment), None)
  (String::from(fragment.as_str()), None)
  } else {
  let version = fragment.parse::<PartialVersion>()?;
- (InternedString::new(path_name), Some(version))
  (String::from(path_name), Some(version))
  }
  }
  },
- None => (InternedString::new(path_name), None),
  None => (String::from(path_name), None),
  }
  };
  Ok(PackageIdSpec {
@@ -148,8 147,8 @@ impl PackageIdSpec {
  })
  }
 
- pub fn name(&self) -> InternedString {
- self.name
  pub fn name(&self) -> &str {
  self.name.as_str()
  }
 
  /// Full `semver::Version`, if present
@@ -171,7 170,7 @@ impl PackageIdSpec {
 
  /// Checks whether the given `PackageId` matches the `PackageIdSpec`.
  pub fn matches(&self, package_id: PackageId) -> bool {
- if self.name() != package_id.name() {
  if self.name() != package_id.name().as_str() {
  return false;
  }
 
@@ -211,7 210,7 @@ impl PackageIdSpec {
  if self.url.is_some() {
  try_spec(
  PackageIdSpec {
- name: self.name,
  name: self.name.clone(),
  version: self.version.clone(),
  url: None,
  },
@@ -221,7 220,7 @@ impl PackageIdSpec {
  if suggestion.is_empty() && self.version.is_some() {
  try_spec(
  PackageIdSpec {
- name: self.name,
  name: self.name.clone(),
  version: None,
  url: None,
  },
@@ -324,7 323,6 @@ impl<'de> de::Deserialize<'de> for PackageIdSpec {
 mod tests {
  use super::PackageIdSpec;
  use crate::core::{PackageId, SourceId};
- use crate::util::interning::InternedString;
  use url::Url;
 
  #[test]
@@ -339,7 337,7 @@ mod tests {
  ok(
  "https://crates.io/foo",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: None,
  url: Some(Url::parse("https://crates.io/foo").unwrap()),
  },
@@ -348,7 346,7 @@ mod tests {
  ok(
  "https://crates.io/foo#1.2.3",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: Some("1.2.3".parse().unwrap()),
  url: Some(Url::parse("https://crates.io/foo").unwrap()),
  },
@@ -357,7 355,7 @@ mod tests {
  ok(
  "https://crates.io/foo#1.2",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: Some("1.2".parse().unwrap()),
  url: Some(Url::parse("https://crates.io/foo").unwrap()),
  },
@@ -366,7 364,7 @@ mod tests {
  ok(
  "https://crates.io/foo#bar:1.2.3",
  PackageIdSpec {
- name: InternedString::new("bar"),
  name: String::from("bar"),
  version: Some("1.2.3".parse().unwrap()),
  url: Some(Url::parse("https://crates.io/foo").unwrap()),
  },
@@ -375,7 373,7 @@ mod tests {
  ok(
  "https://crates.io/foo#[email protected]",
  PackageIdSpec {
- name: InternedString::new("bar"),
  name: String::from("bar"),
  version: Some("1.2.3".parse().unwrap()),
  url: Some(Url::parse("https://crates.io/foo").unwrap()),
  },
@@ -384,7 382,7 @@ mod tests {
  ok(
  "https://crates.io/foo#[email protected]",
  PackageIdSpec {
- name: InternedString::new("bar"),
  name: String::from("bar"),
  version: Some("1.2".parse().unwrap()),
  url: Some(Url::parse("https://crates.io/foo").unwrap()),
  },
@@ -393,7 391,7 @@ mod tests {
  ok(
  "foo",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: None,
  url: None,
  },
@@ -402,7 400,7 @@ mod tests {
  ok(
  "foo:1.2.3",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: Some("1.2.3".parse().unwrap()),
  url: None,
  },
@@ -411,7 409,7 @@ mod tests {
  ok(
  "[email protected]",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: Some("1.2.3".parse().unwrap()),
  url: None,
  },
@@ -420,7 418,7 @@ mod tests {
  ok(
  "[email protected]",
  PackageIdSpec {
- name: InternedString::new("foo"),
  name: String::from("foo"),
  version: Some("1.2".parse().unwrap()),
  url: None,
  },

diff --git a/src/cargo/core/profiles.rs b/src/cargo/core/profiles.rs
@@ -104,7 104,7 @@ impl Profiles {
  // Verify that the requested profile is defined *somewhere*.
  // This simplifies the API (no need for CargoResult), and enforces
  // assumptions about how config profiles are loaded.
- profile_makers.get_profile_maker(requested_profile)?;
  profile_makers.get_profile_maker(&requested_profile)?;
  Ok(profile_makers)
  }
 
@@ -142,21 142,21 @@ impl Profiles {
  (
  "bench",
  TomlProfile {
- inherits: Some(InternedString::new("release")),
  inherits: Some(String::from("release")),
  ..TomlProfile::default()
  },
  ),
  (
  "test",
  TomlProfile {
- inherits: Some(InternedString::new("dev")),
  inherits: Some(String::from("dev")),
  ..TomlProfile::default()
  },
  ),
  (
  "doc",
  TomlProfile {
- inherits: Some(InternedString::new("dev")),
  inherits: Some(String::from("dev")),
  ..TomlProfile::default()
  },
  ),
@@ -173,7 173,7 @@ impl Profiles {
  match &profile.dir_name {
  None => {}
  Some(dir_name) => {
- self.dir_names.insert(name, dir_name.to_owned());
  self.dir_names.insert(name, InternedString::new(dir_name));
  }
  }
 
@@ -212,12 212,13 @@ impl Profiles {
  set: &mut HashSet<InternedString>,
  profiles: &BTreeMap<InternedString, TomlProfile>,
  ) -> CargoResult<ProfileMaker> {
- let mut maker = match profile.inherits {
  let mut maker = match &profile.inherits {
  Some(inherits_name) if inherits_name == "dev" || inherits_name == "release" => {
  // These are the root profiles added in `add_root_profiles`.
- self.get_profile_maker(inherits_name).unwrap().clone()
  self.get_profile_maker(&inherits_name).unwrap().clone()
  }
  Some(inherits_name) => {
  let inherits_name = InternedString::new(&inherits_name);
  if !set.insert(inherits_name) {
  bail!(
  "profile inheritance loop detected with profile `{}` inheriting `{}`",
@@ -263,7 264,7 @@ impl Profiles {
  unit_for: UnitFor,
  kind: CompileKind,
  ) -> Profile {
- let maker = self.get_profile_maker(self.requested_profile).unwrap();
  let maker = self.get_profile_maker(&self.requested_profile).unwrap();
  let mut profile = maker.get_profile(Some(pkg_id), is_member, unit_for.is_for_host());
 
  // Dealing with `panic=abort` and `panic=unwind` requires some special
@@ -325,7 326,7 @@ impl Profiles {
  /// select for the package that was actually built.
  pub fn base_profile(&self) -> Profile {
  let profile_name = self.requested_profile;
- let maker = self.get_profile_maker(profile_name).unwrap();
  let maker = self.get_profile_maker(&profile_name).unwrap();
  maker.get_profile(None, /*is_member*/ true, /*is_for_host*/ false)
  }
 
@@ -372,9 373,9 @@ impl Profiles {
  }
 
  /// Returns the profile maker for the given profile name.
- fn get_profile_maker(&self, name: InternedString) -> CargoResult<&ProfileMaker> {
  fn get_profile_maker(&self, name: &str) -> CargoResult<&ProfileMaker> {
  self.by_name
- .get(&name)
  .get(name)
  .ok_or_else(|| anyhow::format_err!("profile `{}` is not defined", name))
  }
 }
@@ -521,7 522,7 @@ fn merge_profile(profile: &mut Profile, toml: &TomlProfile) {
  None => {}
  }
  if toml.codegen_backend.is_some() {
- profile.codegen_backend = toml.codegen_backend;
  profile.codegen_backend = toml.codegen_backend.as_ref().map(InternedString::from);
  }
  if toml.codegen_units.is_some() {
  profile.codegen_units = toml.codegen_units;
@@ -553,7 554,7 @@ fn merge_profile(profile: &mut Profile, toml: &TomlProfile) {
  profile.incremental = incremental;
  }
  if let Some(flags) = &toml.rustflags {
- profile.rustflags = flags.clone();
  profile.rustflags = flags.iter().map(InternedString::from).collect();
  }
  profile.strip = match toml.strip {
  Some(StringOrBool::Bool(true)) => Strip::Named(InternedString::new("symbols")),
@@ -1162,7 1163,11 @@ fn merge_config_profiles(
  requested_profile: InternedString,
 ) -> CargoResult<BTreeMap<InternedString, TomlProfile>> {
  let mut profiles = match ws.profiles() {
- Some(profiles) => profiles.get_all().clone(),
  Some(profiles) => profiles
  .get_all()
  .iter()
  .map(|(k, v)| (InternedString::new(k), v.clone()))
  .collect(),
  None => BTreeMap::new(),
  };
  // Set of profile names to check if defined in config only.
@@ -1174,7 1179,7 @@ fn merge_config_profiles(
  profile.merge(&config_profile);
  }
  if let Some(inherits) = &profile.inherits {
- check_to_add.insert(*inherits);
  check_to_add.insert(InternedString::new(inherits));
  }
  }
  // Add the built-in profiles. This is important for things like `cargo
@@ -1188,10 1193,10 @@ fn merge_config_profiles(
  while !check_to_add.is_empty() {
  std::mem::swap(&mut current, &mut check_to_add);
  for name in current.drain() {
- if !profiles.contains_key(&name) {
  if !profiles.contains_key(name.as_str()) {
  if let Some(config_profile) = get_config_profile(ws, &name)? {
  if let Some(inherits) = &config_profile.inherits {
- check_to_add.insert(*inherits);
  check_to_add.insert(InternedString::new(inherits));
  }
  profiles.insert(name, config_profile);
  }

diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs
@@ -1491,7 1491,7 @@ impl<'cfg> Workspace<'cfg> {
  // Check if `dep_name` is member of the workspace, but isn't associated with current package.
  self.current_opt() != Some(member) && member.name() == *dep_name
  });
- if is_member && specs.iter().any(|spec| spec.name() == *dep_name) {
  if is_member && specs.iter().any(|spec| spec.name() == dep_name.as_str()) {
  member_specific_features
  .entry(*dep_name)
  .or_default()