Skip to content

Commit

Permalink
Auto merge of #3973 - nipunn1313:readme2, r=Turbo87
Browse files Browse the repository at this point in the history
Support relative URLs from repo-subdirectory packages

Extract `.cargo_vcs_info.json` from the tarball and use it to determine the path in vcs.

Support for this was added to cargo with
rust-lang/cargo#9866

Fixes #3484
(note that support for this in admin::render_readmes not yet exists - would come with #4095)

Depends on #4097 (Because of community/community#4477 - github ends up rendering the diffs together. Go to the commits tab and just look at the most recent commits to review).
  • Loading branch information
bors committed Jan 3, 2022
2 parents b07754a 86d852f commit 3f67acf
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 21 deletions.
37 changes: 25 additions & 12 deletions cargo-registry-markdown/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 245,23 @@ static MARKDOWN_EXTENSIONS: [&str; 7] =
/// use cargo_registry_markdown::text_to_html;
///
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
/// let rendered = text_to_html(text, "README.md", None);
/// let rendered = text_to_html(text, "README.md", None, None);
/// assert_eq!(rendered, "<p><a href=\"https://rust-lang.org/\" rel=\"nofollow noopener noreferrer\">Rust</a> is an awesome <em>systems programming</em> language!</p>\n");
/// ```
pub fn text_to_html(text: &str, path: &str, base_url: Option<&str>) -> String {
let path = Path::new(path);
let base_dir = path.parent().and_then(|p| p.to_str()).unwrap_or("");

if path.extension().is_none() {
pub fn text_to_html(
text: &str,
readme_path_in_pkg: &str,
base_url: Option<&str>,
pkg_path_in_vcs: Option<&str>,
) -> String {
let path_in_vcs = Path::new(pkg_path_in_vcs.unwrap_or("")).join(readme_path_in_pkg);
let base_dir = path_in_vcs.parent().and_then(|p| p.to_str()).unwrap_or("");

if path_in_vcs.extension().is_none() {
return markdown_to_html(text, base_url, base_dir);
}

if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) {
if let Some(ext) = path_in_vcs.extension().and_then(|ext| ext.to_str()) {
if MARKDOWN_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
return markdown_to_html(text, base_url, base_dir);
}
Expand Down Expand Up @@ -477,30 482,38 @@ mod tests {
"s1/s2/readme.md",
] {
assert_eq!(
text_to_html("*lobster*", f, None),
text_to_html("*lobster*", f, None, None),
"<p><em>lobster</em></p>\n"
);
}

assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test")),
text_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test"), None),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test")),
text_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test"), None),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test")),
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), None),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), Some("path/in/vcs/")),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/path/in/vcs/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), Some("path/in/vcs")),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/path/in/vcs/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
}

#[test]
fn text_to_html_renders_other_things() {
for f in &["readme.exe", "readem.org", "blah.adoc"] {
assert_eq!(
text_to_html("<script>lobster</script>\n\nis my friend\n", f, None),
text_to_html("<script>lobster</script>\n\nis my friend\n", f, None, None),
"&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
);
}
Expand Down
5 changes: 5 additions & 0 deletions src/admin/render_readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 205,15 @@ fn render_pkg_readme<R: Read>(mut archive: Archive<R>, pkg_name: &str) -> anyhow
let contents = find_file_by_path(&mut entries, Path::new(&path))
.with_context(|| format!("Failed to read {} file", readme_path))?;

// pkg_path_in_vcs Unsupported from admin::render_readmes. See #4095
// Would need access to cargo_vcs_info
let pkg_path_in_vcs = None;

text_to_html(
&contents,
&readme_path,
manifest.package.repository.as_deref(),
pkg_path_in_vcs,
)
};
return Ok(rendered);
Expand Down
76 changes: 68 additions & 8 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 5,7 @@ use hex::ToHex;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::io::Read;
use std::path::Path;
use std::sync::Arc;
use swirl::Job;

Expand All @@ -18,7 19,7 @@ use crate::worker;

use crate::schema::*;
use crate::util::errors::{cargo_err, AppResult};
use crate::util::{read_fill, read_le_u32, LimitErrorReader, Maximums};
use crate::util::{read_fill, read_le_u32, CargoVcsInfo, LimitErrorReader, Maximums};
use crate::views::{
EncodableCrate, EncodableCrateDependency, EncodableCrateUpload, GoodCrate, PublishWarnings,
};
Expand Down Expand Up @@ -195,7 196,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();
let pkg_name = format!("{}-{}", krate.name, vers);
verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
let cargo_vcs_info = verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
let pkg_path_in_vcs = cargo_vcs_info.map(|info| info.path_in_vcs);

if let Some(readme) = new_crate.readme {
worker::render_and_upload_readme(
Expand All @@ -205,6 207,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
.readme_file
.unwrap_or_else(|| String::from("README.md")),
repo,
pkg_path_in_vcs,
)
.enqueue(&conn)?;
}
Expand Down Expand Up @@ -379,7 382,11 @@ pub fn add_dependencies(
Ok(git_deps)
}

fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<()> {
fn verify_tarball(
pkg_name: &str,
tarball: &[u8],
max_unpack: u64,
) -> AppResult<Option<CargoVcsInfo>> {
// All our data is currently encoded with gzip
let decoder = GzDecoder::new(tarball);

Expand All @@ -389,8 396,12 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<

// Use this I/O object now to take a peek inside
let mut archive = tar::Archive::new(decoder);

let vcs_info_path = Path::new(&pkg_name).join(".cargo_vcs_info.json");
let mut vcs_info = None;

for entry in archive.entries()? {
let entry = entry.map_err(|err| {
let mut entry = entry.map_err(|err| {
err.chain(cargo_err(
"uploaded tarball is malformed or too large when decompressed",
))
Expand All @@ -401,9 412,15 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
// upload a tarball that contains both `foo-0.1.0/` source code as well
// as `bar-0.1.0/` source code, and this could overwrite other crates in
// the registry!
if !entry.path()?.starts_with(&pkg_name) {
let entry_path = entry.path()?;
if !entry_path.starts_with(&pkg_name) {
return Err(cargo_err("invalid tarball uploaded"));
}
if entry_path == vcs_info_path {
let mut contents = String::new();
entry.read_to_string(&mut contents)?;
vcs_info = CargoVcsInfo::from_contents(&contents).ok();
}

// Historical versions of the `tar` crate which Cargo uses internally
// don't properly prevent hard links and symlinks from overwriting
Expand All @@ -415,7 432,7 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
return Err(cargo_err("invalid tarball uploaded"));
}
}
Ok(())
Ok(vcs_info)
}

#[cfg(test)]
Expand All @@ -435,14 452,57 @@ mod tests {
#[test]
fn verify_tarball_test() {
let mut pkg = tar::Builder::new(vec![]);
add_file(&mut pkg, "foo-0.0.1/.cargo_vcs_info.json", br#"{}"#);
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
let mut serialized_archive = vec![];
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
.read_to_end(&mut serialized_archive)
.unwrap();

let limit = 512 * 1024 * 1024;
assert_ok!(verify_tarball("foo-0.0.1", &serialized_archive, limit));
assert_eq!(
verify_tarball("foo-0.0.1", &serialized_archive, limit).unwrap(),
None
);
assert_err!(verify_tarball("bar-0.0.1", &serialized_archive, limit));
}

#[test]
fn verify_tarball_test_incomplete_vcs_info() {
let mut pkg = tar::Builder::new(vec![]);
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
add_file(
&mut pkg,
"foo-0.0.1/.cargo_vcs_info.json",
br#"{"unknown": "field"}"#,
);
let mut serialized_archive = vec![];
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
.read_to_end(&mut serialized_archive)
.unwrap();
let limit = 512 * 1024 * 1024;
let vcs_info = verify_tarball("foo-0.0.1", &serialized_archive, limit)
.unwrap()
.unwrap();
assert_eq!(vcs_info.path_in_vcs, "");
}

#[test]
fn verify_tarball_test_vcs_info() {
let mut pkg = tar::Builder::new(vec![]);
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
add_file(
&mut pkg,
"foo-0.0.1/.cargo_vcs_info.json",
br#"{"path_in_vcs": "path/in/vcs"}"#,
);
let mut serialized_archive = vec![];
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
.read_to_end(&mut serialized_archive)
.unwrap();
let limit = 512 * 1024 * 1024;
let vcs_info = verify_tarball("foo-0.0.1", &serialized_archive, limit)
.unwrap()
.unwrap();
assert_eq!(vcs_info.path_in_vcs, "path/in/vcs");
}
}
43 changes: 43 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 53,46 @@ impl Maximums {
}
}
}

/// Represents relevant contents of .cargo_vcs_info.json file when uploaded from cargo
/// or downloaded from crates.io
#[derive(Debug, Deserialize, Eq, PartialEq)]
pub struct CargoVcsInfo {
/// Path to the package within repo (empty string if root). / not \
#[serde(default)]
pub path_in_vcs: String,
}

impl CargoVcsInfo {
pub fn from_contents(contents: &str) -> serde_json::Result<Self> {
serde_json::from_str(contents)
}
}

#[cfg(test)]
mod tests {
use super::CargoVcsInfo;

#[test]
fn test_cargo_vcs_info() {
assert_eq!(CargoVcsInfo::from_contents("").ok(), None);
assert_eq!(
CargoVcsInfo::from_contents("{}").unwrap(),
CargoVcsInfo {
path_in_vcs: "".into()
}
);
assert_eq!(
CargoVcsInfo::from_contents(r#"{"path_in_vcs": "hi"}"#).unwrap(),
CargoVcsInfo {
path_in_vcs: "hi".into()
}
);
assert_eq!(
CargoVcsInfo::from_contents(r#"{"path_in_vcs": "hi", "future": "field"}"#).unwrap(),
CargoVcsInfo {
path_in_vcs: "hi".into()
}
);
}
}
8 changes: 7 additions & 1 deletion src/worker/readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 14,17 @@ pub fn render_and_upload_readme(
text: String,
readme_path: String,
base_url: Option<String>,
pkg_path_in_vcs: Option<String>,
) -> Result<(), PerformError> {
use crate::schema::*;
use diesel::prelude::*;

let rendered = text_to_html(&text, &readme_path, base_url.as_deref());
let rendered = text_to_html(
&text,
&readme_path,
base_url.as_deref(),
pkg_path_in_vcs.as_deref(),
);

conn.transaction(|| {
Version::record_readme_rendering(version_id, conn)?;
Expand Down

0 comments on commit 3f67acf

Please sign in to comment.