Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add proper underscore handling to float and complex types. #5356

Merged
merged 10 commits into from
Jul 28, 2024
2 changes: 0 additions & 2 deletions Lib/test/test_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,6 @@ def test_constructor_negative_nans_from_string(self):
self.assertEqual(copysign(1., complex("-nan-nanj").real), -1.)
self.assertEqual(copysign(1., complex("-nan-nanj").imag), -1.)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_underscores(self):
# check underscores
for lit in VALID_UNDERSCORE_LITERALS:
Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ def test_float(self):
def test_noargs(self):
self.assertEqual(float(), 0.0)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_underscores(self):
for lit in VALID_UNDERSCORE_LITERALS:
if not any(ch in lit for ch in 'jJxXoObB'):
Expand Down
14 changes: 10 additions & 4 deletions vm/src/builtins/complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,13 +545,17 @@ fn parse_str(s: &str) -> Option<Complex64> {
};

let value = match s.strip_suffix(|c| c == 'j' || c == 'J') {
None => Complex64::new(crate::literal::float::parse_str(s)?, 0.0),
None => {
let stripped = float::float_strip_underscores(s.as_bytes())?;
Complex64::new(crate::literal::float::parse_bytes(&stripped)?, 0.0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any case parse_bytes is called without float_strip_underscores?
How about put it in parse_bytes if it is necessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not come across any cases where parse_bytes calls needed to or could be performed without underscore handling. I"ll look into putting this directly in parse_bytes 👍

}
Some(mut s) => {
let mut real = 0.0;
// Find the central +/- operator. If it exists, parse the real part.
for (i, w) in s.as_bytes().windows(2).enumerate() {
if (w[1] == b'+' || w[1] == b'-') && !(w[0] == b'e' || w[0] == b'E') {
real = crate::literal::float::parse_str(&s[..=i])?;
let stripped = float::float_strip_underscores(s[..=i].as_bytes())?;
real = crate::literal::float::parse_bytes(&stripped)?;
s = &s[i + 1..];
break;
}
Expand All @@ -562,9 +566,11 @@ fn parse_str(s: &str) -> Option<Complex64> {
"" | "+" => 1.0,
// "-j"
"-" => -1.0,
s => crate::literal::float::parse_str(s)?,
s => {
let stripped = float::float_strip_underscores(s.as_bytes())?;
crate::literal::float::parse_bytes(&stripped)?
}
};

Complex64::new(real, imag)
}
};
Expand Down
45 changes: 40 additions & 5 deletions vm/src/builtins/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,33 @@ impl Constructor for PyFloat {
}
}

pub fn float_strip_underscores(b: &[u8]) -> Option<Vec<u8>> {
let mut prev = b'\0';
let mut dup = Vec::<u8>::new();
for p in b {
if *p == b'_' {
// Underscores are only allowed after digits.
if !prev.is_ascii_digit() {
return None;
}
} else {
dup.push(*p);
// Underscores are only allowed before digits.
if prev == b'_' && !p.is_ascii_digit() {
return None;
}
}
prev = *p;
}

// Underscores are not allowed at the end.
if prev == b'_' {
return None;
}

Some(dup)
}

fn float_from_string(val: PyObjectRef, vm: &VirtualMachine) -> PyResult<f64> {
let (bytearray, buffer, buffer_lock);
let b = if let Some(s) = val.payload_if_subclass::<PyStr>(vm) {
Expand All @@ -178,11 +205,19 @@ fn float_from_string(val: PyObjectRef, vm: &VirtualMachine) -> PyResult<f64> {
val.class().name()
)));
};
crate::literal::float::parse_bytes(b).ok_or_else(|| {
val.repr(vm)
.map(|repr| vm.new_value_error(format!("could not convert string to float: {repr}")))
.unwrap_or_else(|e| e)
})

let err = val
.repr(vm)
.map(|repr| vm.new_value_error(format!("could not convert string to float: {repr}")))
.unwrap_or_else(|e| e);

if !b.contains(&b'_') {
crate::literal::float::parse_bytes(b).ok_or(err)
} else if let Some(dup) = float_strip_underscores(b) {
crate::literal::float::parse_bytes(&dup).ok_or(err)
} else {
Err(err)
}
}

#[pyclass(
Expand Down
Loading