Auto merge of #123351 - beetrees:x86-ret-snan-rust, r=nikic,workingju…

…bilee Ensure floats are returned losslessly by the Rust ABI on 32-bit x86 Solves #115567 for the (default) `"Rust"` ABI. When compiling for 32-bit x86, this PR changes the `"Rust"` ABI to return floats indirectly instead of in x87 registers (with the exception of single `f32`s, which this PR returns in general purpose registers as they are small enough to fit in one). No change is made to the `"C"` ABI as that ABI requires x87 register usage and therefore will need a different solution.
rust-lang · Jul 12, 2024 · c6727fc · c6727fc
2 parents 62c068f cae9d48
commit c6727fc
Show file tree

Hide file tree

Showing 8 changed files with 461 additions and 10 deletions.
diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs
@@ -743,6 743,40 @@ fn fn_abi_adjust_for_abi<'tcx>(
  return;
  }
 
  // Avoid returning floats in x87 registers on x86 as loading and storing from x87
  // registers will quiet signalling NaNs.
  if cx.tcx.sess.target.arch == "x86"
  && arg_idx.is_none()
  // Intrinsics themselves are not actual "real" functions, so theres no need to
  // change their ABIs.
  && abi != SpecAbi::RustIntrinsic
  {
  match arg.layout.abi {
  // Handle similar to the way arguments with an `Abi::Aggregate` abi are handled
  // below, by returning arguments up to the size of a pointer (32 bits on x86)
  // cast to an appropriately sized integer.
  Abi::Scalar(s) if s.primitive() == Float(F32) => {
  // Same size as a pointer, return in a register.
  arg.cast_to(Reg::i32());
  return;
  }
  Abi::Scalar(s) if s.primitive() == Float(F64) => {
  // Larger than a pointer, return indirectly.
  arg.make_indirect();
  return;
  }
  Abi::ScalarPair(s1, s2)
  if matches!(s1.primitive(), Float(F32 | F64))
  || matches!(s2.primitive(), Float(F32 | F64)) =>
  {
  // Larger than a pointer, return indirectly.
  arg.make_indirect();
  return;
  }
  _ => {}
  };
  }
 
  match arg.layout.abi {
  Abi::Aggregate { .. } => {}
 

diff --git a/src/doc/rustc/src/platform-support.md b/src/doc/rustc/src/platform-support.md
@@ -41,10 41,10 @@ target | notes
 `x86_64-pc-windows-msvc` | 64-bit MSVC (Windows 10 , Windows Server 2016 )
 `x86_64-unknown-linux-gnu` | 64-bit Linux (kernel 3.2 , glibc 2.17 )
 
-[^x86_32-floats-return-ABI]: Due to limitations of the C ABI, floating-point support on `i686` targets is non-compliant: floating-point return values are passed via an x87 register, so NaN payload bits can be lost. See [issue #114479][x86-32-float-issue].
 [^x86_32-floats-return-ABI]: Due to limitations of the C ABI, floating-point support on `i686` targets is non-compliant: floating-point return values are passed via an x87 register, so NaN payload bits can be lost. Functions with the default Rust ABI are not affected. See [issue #115567][x86-32-float-return-issue].
 
 [77071]: https://github.com/rust-lang/rust/issues/77071
-[x86-32-float-issue]: https://github.com/rust-lang/rust/issues/114479
 [x86-32-float-return-issue]: https://github.com/rust-lang/rust/issues/115567
 
 ## Tier 1
 
@@ -209,6 209,8 @@ target | std | notes
 
 [^x86_32-floats-x87]: Floating-point support on `i586` targets is non-compliant: the `x87` registers and instructions used for these targets do not provide IEEE-754-compliant behavior, in particular when it comes to rounding and NaN payload bits. See [issue #114479][x86-32-float-issue].
 
 [x86-32-float-issue]: https://github.com/rust-lang/rust/issues/114479
 
 [wasi-rename]: https://github.com/rust-lang/compiler-team/issues/607
 
 [Fortanix ABI]: https://edp.fortanix.com/

diff --git a/tests/assembly/x86-return-float.rs b/tests/assembly/x86-return-float.rs
@@ -0,0 1,328 @@
 //@ assembly-output: emit-asm
 //@ only-x86
 // FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled.
 // There's no compiletest directive to ignore a test on i586 only, so just always explicitly enable
 // SSE2.
 // Use the same target CPU as `i686` so that LLVM orders the instructions in the same order.
 //@ compile-flags: -Ctarget-feature= sse2 -Ctarget-cpu=pentium4
 // Force frame pointers to make ASM more consistent between targets
 //@ compile-flags: -O -C force-frame-pointers
 //@ filecheck-flags: --implicit-check-not fld --implicit-check-not fst
 //@ revisions: unix windows
 //@[unix] ignore-windows
 //@[windows] only-windows
 
 #![crate_type = "lib"]
 #![feature(f16, f128)]
 
 // Tests that returning `f32` and `f64` with the "Rust" ABI on 32-bit x86 doesn't use the x87
 // floating point stack, as loading and storing `f32`s and `f64`s to and from the x87 stack quietens
 // signalling NaNs.
 
 // Returning individual floats
 
 // CHECK-LABEL: return_f32:
 #[no_mangle]
 pub fn return_f32(x: f32) -> f32 {
  // CHECK: movl {{.*}}(�p), �x
  // CHECK-NOT: ax
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f64:
 #[no_mangle]
 pub fn return_f64(x: f64) -> f64 {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 4]](�p), %[[VAL:.*]]
  // CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
  // CHECK: retl
  x
 }
 
 // Returning scalar pairs containing floats
 
 // CHECK-LABEL: return_f32_f32:
 #[no_mangle]
 pub fn return_f32_f32(x: (f32, f32)) -> (f32, f32) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f64_f64:
 #[no_mangle]
 pub fn return_f64_f64(x: (f64, f64)) -> (f64, f64) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 12]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f32_f64:
 #[no_mangle]
 pub fn return_f32_f64(x: (f32, f64)) -> (f32, f64) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movsd %[[VAL2]], {{4|8}}(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f64_f32:
 #[no_mangle]
 pub fn return_f64_f32(x: (f64, f32)) -> (f64, f32) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 12]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f32_other:
 #[no_mangle]
 pub fn return_f32_other(x: (f32, usize)) -> (f32, usize) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movl %[[VAL2]], 4(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f64_other:
 #[no_mangle]
 pub fn return_f64_other(x: (f64, usize)) -> (f64, usize) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 12]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_other_f32:
 #[no_mangle]
 pub fn return_other_f32(x: (usize, f32)) -> (usize, f32) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_other_f64:
 #[no_mangle]
 pub fn return_other_f64(x: (usize, f64)) -> (usize, f64) {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movsd [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movsd %[[VAL2]], {{4|8}}(%[[PTR]])
  // CHECK: retl
  x
 }
 
 // Calling functions returning floats
 
 // CHECK-LABEL: call_f32:
 #[no_mangle]
 pub unsafe fn call_f32(x: &mut f32) {
  extern "Rust" {
  fn get_f32() -> f32;
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f32
  // CHECK-NEXT: movl �x, (%[[PTR]])
  *x = get_f32();
 }
 
 // CHECK-LABEL: call_f64:
 #[no_mangle]
 pub unsafe fn call_f64(x: &mut f64) {
  extern "Rust" {
  fn get_f64() -> f64;
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f64
  // CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
  // CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
  *x = get_f64();
 }
 
 // Calling functions returning scalar pairs containing floats
 
 // CHECK-LABEL: call_f32_f32:
 #[no_mangle]
 pub unsafe fn call_f32_f32(x: &mut (f32, f32)) {
  extern "Rust" {
  fn get_f32_f32() -> (f32, f32);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f32_f32
  // CHECK: movss [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 4]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
  *x = get_f32_f32();
 }
 
 // CHECK-LABEL: call_f64_f64:
 #[no_mangle]
 pub unsafe fn call_f64_f64(x: &mut (f64, f64)) {
  extern "Rust" {
  fn get_f64_f64() -> (f64, f64);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f64_f64
  // unix: movsd [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // unix-NEXT: movsd [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // windows: movsd (%esp), %[[VAL1:.*]]
  // windows-NEXT: movsd 8(%esp), %[[VAL2:.*]]
  // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
  *x = get_f64_f64();
 }
 
 // CHECK-LABEL: call_f32_f64:
 #[no_mangle]
 pub unsafe fn call_f32_f64(x: &mut (f32, f64)) {
  extern "Rust" {
  fn get_f32_f64() -> (f32, f64);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f32_f64
  // unix: movss [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // unix-NEXT: movsd [[#%d,OFFSET 4]](�p), %[[VAL2:.*]]
  // windows: movss (%esp), %[[VAL1:.*]]
  // windows-NEXT: movsd 8(%esp), %[[VAL2:.*]]
  // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
  // unix-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
  // windows-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
  *x = get_f32_f64();
 }
 
 // CHECK-LABEL: call_f64_f32:
 #[no_mangle]
 pub unsafe fn call_f64_f32(x: &mut (f64, f32)) {
  extern "Rust" {
  fn get_f64_f32() -> (f64, f32);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f64_f32
  // unix: movsd [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // unix-NEXT: movss [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // windows: movsd (%esp), %[[VAL1:.*]]
  // windows-NEXT: movss 8(%esp), %[[VAL2:.*]]
  // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
  *x = get_f64_f32();
 }
 
 // CHECK-LABEL: call_f32_other:
 #[no_mangle]
 pub unsafe fn call_f32_other(x: &mut (f32, usize)) {
  extern "Rust" {
  fn get_f32_other() -> (f32, usize);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f32_other
  // CHECK: movss [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 4]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movl %[[VAL2]], 4(%[[PTR]])
  *x = get_f32_other();
 }
 
 // CHECK-LABEL: call_f64_other:
 #[no_mangle]
 pub unsafe fn call_f64_other(x: &mut (f64, usize)) {
  extern "Rust" {
  fn get_f64_other() -> (f64, usize);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_f64_other
  // unix: movsd [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // unix-NEXT: movl [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // windows: movsd (%esp), %[[VAL1:.*]]
  // windows-NEXT: movl 8(%esp), %[[VAL2:.*]]
  // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
  *x = get_f64_other();
 }
 
 // CHECK-LABEL: call_other_f32:
 #[no_mangle]
 pub unsafe fn call_other_f32(x: &mut (usize, f32)) {
  extern "Rust" {
  fn get_other_f32() -> (usize, f32);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_other_f32
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movss [[#%d,OFFSET 4]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
  // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
  *x = get_other_f32();
 }
 
 // CHECK-LABEL: call_other_f64:
 #[no_mangle]
 pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
  extern "Rust" {
  fn get_other_f64() -> (usize, f64);
  }
  // CHECK: movl {{.*}}(�p), %[[PTR:.*]]
  // CHECK: calll {{()|_}}get_other_f64
  // unix: movl [[#%d,OFFSET:]](�p), %[[VAL1:.*]]
  // unix-NEXT: movsd [[#%d,OFFSET 4]](�p), %[[VAL2:.*]]
  // windows: movl (%esp), %[[VAL1:.*]]
  // windows-NEXT: movsd 8(%esp), %[[VAL2:.*]]
  // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
  // unix-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
  // windows-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
  *x = get_other_f64();
 }
 
 // The "C" ABI for `f16` and `f128` on x86 has never used the x87 floating point stack. Do some
 // basic checks to ensure this remains the case for the "Rust" ABI.
 
 // CHECK-LABEL: return_f16:
 #[no_mangle]
 pub fn return_f16(x: f16) -> f16 {
  // CHECK: pinsrw $0, {{.*}}(�p), %xmm0
  // CHECK-NOT: xmm0
  // CHECK: retl
  x
 }
 
 // CHECK-LABEL: return_f128:
 #[no_mangle]
 pub fn return_f128(x: f128) -> f128 {
  // CHECK: movl [[#%d,OFFSET:]](�p), %[[PTR:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 16]](�p), %[[VAL4:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 4]](�p), %[[VAL1:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 8]](�p), %[[VAL2:.*]]
  // CHECK-NEXT: movl [[#%d,OFFSET 12]](�p), %[[VAL3:.*]]
  // CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
  // CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
  // CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
  // CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
  // CHECK: retl
  x
 }