Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't recover lifetimes/labels containing emojis as character literals #108031

Merged
merged 3 commits into from
Feb 18, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Don't recover lifetimes/labels containing emojis as character literals
Note that at the time of this commit, `unic-emoji-char` seems to have
data tables only up to Unicode 5.0, but Unicode is already newer than
this.

A newer emoji such as `🥺` will not be recognized as an emoji
but older emojis such as `🐱` will.
  • Loading branch information
jieyouxu committed Feb 14, 2023
commit 380fa264132ad481e73cbbf0f3a0feefd99a1d78
2 changes: 2 additions & 0 deletions compiler/rustc_errors/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 471,8 @@ pub enum StashKey {
/// When an invalid lifetime e.g. `'2` should be reinterpreted
/// as a char literal in the parser
LifetimeIsChar,
/// When an invalid lifetime e.g. `'🐱` contains emoji.
LifetimeContainsEmoji,
/// Maybe there was a typo where a comma was forgotten before
/// FRU syntax
MaybeFruTypo,
Expand Down
43 changes: 33 additions & 10 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 95,7 @@ pub enum TokenKind {
Literal { kind: LiteralKind, suffix_start: u32 },

/// "'a"
Lifetime { starts_with_number: bool },
Lifetime { starts_with_number: bool, contains_emoji: bool },

// One-char tokens:
/// ";"
Expand Down Expand Up @@ -630,7 630,13 @@ impl Cursor<'_> {
// If the first symbol is valid for identifier, it can be a lifetime.
// Also check if it's a number for a better error reporting (so '0 will
// be reported as invalid lifetime and not as unterminated char literal).
is_id_start(self.first()) || self.first().is_digit(10)
// We also have to account for potential `'🐱` emojis to avoid reporting
// it as an unterminated char literal.
is_id_start(self.first())
|| self.first().is_digit(10)
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
|| unic_emoji_char::is_emoji(self.first())
};

if !can_be_a_lifetime {
Expand All @@ -643,16 649,33 @@ impl Cursor<'_> {
return Literal { kind, suffix_start };
}

// Either a lifetime or a character literal with
// length greater than 1.
// Either a lifetime or a character literal.

let starts_with_number = self.first().is_digit(10);
let mut contains_emoji = false;

// Skip the literal contents.
// First symbol can be a number (which isn't a valid identifier start),
// so skip it without any checks.
self.bump();
self.eat_while(is_id_continue);
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
if unic_emoji_char::is_emoji(self.first()) {
contains_emoji = true;
} else {
// Skip the literal contents.
// First symbol can be a number (which isn't a valid identifier start),
// so skip it without any checks.
self.bump();
}
self.eat_while(|c| {
if is_id_continue(c) {
true
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
} else if unic_emoji_char::is_emoji(c) {
contains_emoji = true;
true
} else {
false
}
});

// Check if after skipping literal contents we've met a closing
// single quote (which means that user attempted to create a
Expand All @@ -662,7 685,7 @@ impl Cursor<'_> {
let kind = Char { terminated: true };
Literal { kind, suffix_start: self.pos_within_token() }
} else {
Lifetime { starts_with_number }
Lifetime { starts_with_number, contains_emoji }
}
}

Expand Down
9 changes: 7 additions & 2 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 200,21 @@ impl<'a> StringReader<'a> {
};
token::Literal(token::Lit { kind, symbol, suffix })
}
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
if starts_with_number {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeIsChar);
} else if contains_emoji {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeContainsEmoji);
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident)
Expand Down
45 changes: 45 additions & 0 deletions tests/ui/lexer/issue-108019-bad-emoji-recovery.rs
Original file line number Diff line number Diff line change
@@ -0,0 1,45 @@
#![allow(unused_labels)]

// FIXME(#108019): outdated Unicode table
// fn foo() {
// '🥺 loop {
// break
// }
// }

fn bar() {
'🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot contain emojis
break
}
}

fn qux() {
'a🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot contain emojis
break
}
}

fn quux() {
'1🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot start with a number
break
}
}

fn x<'🐱>() -> &'🐱 () {
//~^ ERROR lifetimes or labels cannot contain emojis
//~| ERROR lifetimes or labels cannot contain emojis
&()
}

fn y() {
'a🐱: loop {}
//~^ ERROR lifetimes or labels cannot contain emojis
}

fn main() {}
86 changes: 86 additions & 0 deletions tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr
Original file line number Diff line number Diff line change
@@ -0,0 1,86 @@
error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
LL | '🐱 loop {
| ^--- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them

error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
LL | 'a🐱 loop {
| ^---- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them

error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
LL | '1🐱 loop {
| ^---- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
LL | '🐱 loop {
| ^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
LL | 'a🐱 loop {
| ^^^^

error: lifetimes or labels cannot start with a number
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
LL | '1🐱 loop {
| ^^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:6
|
LL | fn x<'🐱>() -> &'🐱 () {
| ^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:16
|
LL | fn x<'🐱>() -> &'🐱 () {
| ^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:41:5
|
LL | 'a🐱: loop {}
| ^^^^

error: aborting due to 9 previous errors