Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: V2.2 Support configuration of hash function for source checksums #181

Merged
merged 15 commits into from
May 15, 2024
Merged
Prev Previous commit
Next Next commit
Remove crc32 and xxhash, keep xxhash3
  • Loading branch information
arnauorriols committed May 15, 2024
commit 1890e5b8ca3c180eaca2a1cf1b703f932a39d819
11 changes: 0 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 2 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 37,7 @@ name = "source_hash_function"
harness = false

[features]
crc32 = ["dep:crc32fast"]
xxhash = ["dep:twox-hash"]
xxhash3 = ["dep:xxhash-rust"]
xxhash3 = ["xxhash-rust/xxh3"]
sha256 = ["dep:sha2"]
# backwards compatibility. Disabling sha256 will break compatibility with eszips older than v2.2
default = ["sha256"]
Expand All @@ -58,9 56,7 @@ serde_json = "1"
sha2 = {version = "0.10.1", optional = true}
thiserror = "1.0.30"
url = "2.2.2"
crc32fast = {version = "1", optional = true}
xxhash-rust = { version = "0.8", optional = true, features = ["xxh3"] }
twox-hash = { version = "1.6", optional = true, default-features = false }
xxhash-rust = { version = "0.8", optional = true }

[dev-dependencies]
import_map = { workspace = true }
Expand Down
68 changes: 0 additions & 68 deletions benches/source_hash_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 14,6 @@ fn into_bytes_sha256(mut eszip: EszipV2) -> Vec<u8> {
eszip.into_bytes()
}

#[cfg(feature = "crc32")]
fn into_bytes_crc32(mut eszip: EszipV2) -> Vec<u8> {
eszip.set_checksum(Checksum::Crc32);
eszip.into_bytes()
}

#[cfg(feature = "xxhash")]
fn into_bytes_xxhash(mut eszip: EszipV2) -> Vec<u8> {
eszip.set_checksum(Checksum::XxHash);
eszip.into_bytes()
}

#[cfg(feature = "xxhash3")]
fn into_bytes_xxhash3(mut eszip: EszipV2) -> Vec<u8> {
eszip.set_checksum(Checksum::XxHash3);
Expand Down Expand Up @@ -67,40 55,6 @@ fn bench_into_bytes(c: &mut Criterion) {
)
},
);
#[cfg(feature = "crc32")]
group.bench_with_input(
BenchmarkId::new("CRC32", format!("{mb}MB")),
&mb,
|b, mb| {
b.iter_batched(
|| {
let rt = tokio::runtime::Builder::new_current_thread()
.build()
.unwrap();
rt.block_on(build_eszip(*mb))
},
into_bytes_crc32,
criterion::BatchSize::SmallInput,
)
},
);
#[cfg(feature = "xxhash")]
group.bench_with_input(
BenchmarkId::new("XXHASH", format!("{mb}MB")),
&mb,
|b, mb| {
b.iter_batched(
|| {
let rt = tokio::runtime::Builder::new_current_thread()
.build()
.unwrap();
rt.block_on(build_eszip(*mb))
},
into_bytes_xxhash,
criterion::BatchSize::SmallInput,
)
},
);
#[cfg(feature = "xxhash3")]
group.bench_with_input(
BenchmarkId::new("XXHASH3", format!("{mb}MB")),
Expand Down Expand Up @@ -158,28 112,6 @@ fn bench_parse(c: &mut Criterion) {
|b, bytes| b.to_async(&rt).iter(|| parse(bytes)),
);
}
#[cfg(feature = "crc32")]
{
let mut eszip = rt.block_on(build_eszip(mb));
eszip.set_checksum(Checksum::Crc32);
let bytes = eszip.into_bytes();
group.bench_with_input(
BenchmarkId::new("CRC32", format!("{mb}MB")),
&bytes,
|b, bytes| b.to_async(&rt).iter(|| parse(bytes)),
);
}
#[cfg(feature = "xxhash")]
{
let mut eszip = rt.block_on(build_eszip(mb));
eszip.set_checksum(Checksum::XxHash);
let bytes = eszip.into_bytes();
group.bench_with_input(
BenchmarkId::new("XXHASH", format!("{mb}MB")),
&bytes,
|b, bytes| b.to_async(&rt).iter(|| parse(bytes)),
);
}
#[cfg(feature = "xxhash3")]
{
let mut eszip = rt.block_on(build_eszip(mb));
Expand Down
98 changes: 20 additions & 78 deletions src/v2.rs
arnauorriols marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 4,6 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::future::Future;
use std::hash::Hash;
#[cfg(feature = "xxhash")]
use std::hash::Hasher;
use std::mem::size_of;
use std::sync::Arc;
use std::sync::Mutex;
Expand All @@ -28,13 26,7 @@ use deno_semver::package::PackageReq;
use futures::future::poll_fn;
use futures::io::AsyncReadExt;
use hashlink::linked_hash_map::LinkedHashMap;
#[cfg(feature = "xxhash")]
use twox_hash::XxHash64;
pub use url::Url;
#[cfg(feature = "xxhash3")]
use xxhash_rust::xxh3::xxh3_64;
#[cfg(feature = "sha256")]
use {sha2::Digest, sha2::Sha256};

use crate::error::ParseError;
use crate::Module;
Expand Down Expand Up @@ -238,12 230,8 @@ pub enum Checksum {
NoChecksum = 0,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, thanks for adding this! It should speed up running code using deno compile.

#[cfg(feature = "sha256")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should always compile with the enum variants and instead just error when a cargo feature is not enabled when serializing or deserializing with a variant that doesn't have its feature enabled? For example, when reading an eszip that is crc32 we could error saying to enable the crc32 cargo feature in order to read that eszip.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sha256 = 1,
#[cfg(feature = "crc32")]
Crc32 = 2,
#[cfg(feature = "xxhash")]
XxHash = 3,
#[cfg(feature = "xxhash3")]
XxHash3 = 4,
XxHash3 = 2,
}

impl Checksum {
Expand All @@ -252,10 240,6 @@ impl Checksum {
Self::NoChecksum => 0,
#[cfg(feature = "sha256")]
Self::Sha256 => 32,
#[cfg(feature = "crc32")]
Self::Crc32 => 4,
#[cfg(feature = "xxhash")]
Self::XxHash => 8,
#[cfg(feature = "xxhash3")]
Self::XxHash3 => 8,
}
Expand All @@ -266,42 250,27 @@ impl Checksum {
0 => Self::NoChecksum,
#[cfg(feature = "sha256")]
1 => Self::Sha256,
#[cfg(feature = "crc32")]
2 => Self::Crc32,
#[cfg(feature = "xxhash")]
3 => Self::XxHash,
#[cfg(feature = "xxhash3")]
4 => Self::XxHash3,
2 => Self::XxHash3,
_ => return None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this should error when it finds something it doesn't recognize? Otherwise it might lead to corrupt data being loaded I think.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to have a forward-compatible design that falls back to no checksuming when encountering an unknown hash function. This applies both to decoding an eszip built with a newer version of the library, and built with a hash function not enabled for your library.

This does not lead to corrupt data thanks to being explicit about the checksum size in the options header. Even if the library does not know how to validate it, it still knows how long the checksum is. Users are expected to use EszipV2::is_checksumed() and EszipV2::should_be_checksumed() to ensure the content has been checksumed, if they rely on this guarantee.

See

eszip/src/v2.rs

Lines 2444 to 2482 in fa49cb5

#[cfg(feature = "sha256")]
#[tokio::test]
#[should_panic]
async fn v2_2_unknown_checksum_function_degrades_to_no_checksum() {
// checksum 255; checksum_size 32
let option_bytes = &[0, 255, 1, 32];
let futuristic_options = [
4_u32.to_be_bytes().as_slice(),
option_bytes,
&<sha2::Sha256 as sha2::Digest>::digest(option_bytes).as_slice(),
]
.concat();
let mut eszip = main_eszip().await;
// Using sha256/32Bytes as mock hash.
eszip.set_checksum(Checksum::Sha256);
let bytes = eszip.into_bytes();
let existing_options_size = std::mem::size_of::<u32>()
std::mem::size_of::<u8>() * 4
<sha2::Sha256 as sha2::Digest>::output_size();
let options_start = ESZIP_V2_2_MAGIC.len();
let new_bytes = [
&bytes[..options_start],
futuristic_options.as_slice(),
&bytes[options_start existing_options_size..],
]
.concat();
let (new_eszip, fut) = EszipV2::parse(BufReader::new(new_bytes.as_slice()))
.await
.unwrap();
fut.await.unwrap();
assert_eq!(new_eszip.options.checksum, None);
assert_eq!(new_eszip.options.checksum_size(), Some(32));
assert!(!new_eszip.is_checksumed());
assert!(new_eszip.should_be_checksumed());
// This should panic, as cannot re-encode without setting an explicit checksum configuration
new_eszip.into_bytes();
}

})
}
fn hash(
self,
#[cfg_attr(
not(any(
feature = "sha256",
feature = "crc32",
feature = "xxhash",
feature = "xxhash3"
)),
not(any(feature = "sha256", feature = "xxhash3")),
allow(unused)
)]
bytes: &[u8],
) -> Vec<u8> {
match self {
Self::NoChecksum => Vec::new(),
#[cfg(feature = "sha256")]
Self::Sha256 => Sha256::digest(bytes).as_slice().to_vec(),
#[cfg(feature = "crc32")]
Self::Crc32 => crc32fast::hash(bytes).to_be_bytes().into(),
#[cfg(feature = "xxhash")]
Self::XxHash => {
let mut hasher = XxHash64::default();
hasher.write(bytes);
hasher.finish().to_be_bytes().into()
}
Self::Sha256 => <sha2::Sha256 as sha2::Digest>::digest(bytes)
.as_slice()
.to_vec(),
#[cfg(feature = "xxhash3")]
Self::XxHash3 => xxh3_64(bytes).to_be_bytes().into(),
Self::XxHash3 => xxhash_rust::xxh3::xxh3_64(bytes).to_be_bytes().into(),
}
}
}
Expand Down Expand Up @@ -1440,8 1409,6 @@ mod tests {
use import_map::ImportMap;
use pretty_assertions::assert_eq;
use url::Url;
#[cfg(feature = "xxhash3")]
use xxhash_rust::xxh3::xxh3_64;

use super::Checksum;
use super::EszipV2;
Expand Down Expand Up @@ -2364,34 2331,9 @@ mod tests {
assert!(eszip.is_checksumed());
}

#[cfg(feature = "crc32")]
#[tokio::test]
async fn v2_2_set_crc32_checksum() {
let mut eszip = main_eszip().await;
eszip.set_checksum(super::Checksum::Crc32);
let main_source = eszip
.get_module("file:///main.ts")
.unwrap()
.source()
.await
.unwrap();
let bytes = eszip.into_bytes();
let main_crc32 = crc32fast::hash(&main_source).to_be_bytes();
let crc32_in_bytes = bytes
.windows(main_crc32.len())
.any(|window| window == main_crc32);
assert!(crc32_in_bytes);
let (parsed_eszip, fut) = EszipV2::parse(BufReader::new(bytes.as_slice()))
.await
.unwrap();
fut.await.unwrap();
assert_eq!(parsed_eszip.options.checksum, Some(super::Checksum::Crc32));
assert!(parsed_eszip.is_checksumed());
}

#[cfg(feature = "xxhash")]
#[cfg(feature = "xxhash3")]
#[tokio::test]
async fn v2_2_set_xxhash_checksum() {
async fn v2_2_set_xxhash3_checksum() {
let mut eszip = main_eszip().await;
eszip.set_checksum(super::Checksum::XxHash3);
let main_source = eszip
Expand All @@ -2401,7 2343,7 @@ mod tests {
.await
.unwrap();
let bytes = eszip.into_bytes();
let main_xxhash = xxh3_64(&main_source).to_be_bytes();
let main_xxhash = xxhash_rust::xxh3::xxh3_64(&main_source).to_be_bytes();
let xxhash_in_bytes = bytes
.windows(main_xxhash.len())
.any(|window| window == main_xxhash);
Expand Down Expand Up @@ -2481,28 2423,28 @@ mod tests {
new_eszip.into_bytes();
}

#[cfg(feature = "crc32")]
#[cfg(feature = "sha256")]
#[tokio::test]
async fn wrong_checksum() {
let mut eszip = main_eszip().await;
eszip.set_checksum(Checksum::Crc32);
eszip.set_checksum(Checksum::Sha256);
let main_source = eszip
.get_module("file:///main.ts")
.unwrap()
.source()
.await
.unwrap();
let bytes = eszip.into_bytes();
let mut main_crc32 = crc32fast::hash(&main_source).to_be_bytes();
let crc32_in_bytes_start = bytes
.windows(main_crc32.len())
.position(|window| window == main_crc32)
let mut main_sha256 = <sha2::Sha256 as sha2::Digest>::digest(&main_source);
let sha256_in_bytes_start = bytes
.windows(main_sha256.len())
.position(|window| window == &*main_sha256)
.unwrap();
main_crc32.reverse();
main_sha256.reverse();
let bytes = [
&bytes[..crc32_in_bytes_start],
main_crc32.as_slice(),
&bytes[crc32_in_bytes_start main_crc32.len()..],
&bytes[..sha256_in_bytes_start],
main_sha256.as_slice(),
&bytes[sha256_in_bytes_start main_sha256.len()..],
]
.concat();
let (_eszip, fut) = EszipV2::parse(BufReader::new(bytes.as_slice()))
Expand Down
Loading