Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ext/ffi): Implement FFI fast-call trampoline with Dynasmrt #15305

Merged
Merged
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift click to select a range
8be9d6e
Replace tinycc-based JIT trampoline with js recv-as-first-arg "pattern"
arnauorriols Jul 21, 2022
4ebd951
add assertFastCall in test_ffi to ensure fast-API-call optimization
arnauorriols Jul 22, 2022
9a0b543
Implement JIT compilation of FFI fast-call trampoline with Dynasmrt
arnauorriols Jul 25, 2022
fed0fd2
Comment on rationale behind integer/sse argument classification in ff…
arnauorriols Jul 25, 2022
5ede98d
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
arnauorriols Jul 26, 2022
a4182fa
Apply nitpick suggestions
arnauorriols Jul 26, 2022
f1abc06
Implement 8 and 16 bit integer casting to 32 bit in arguments and ret…
arnauorriols Jul 28, 2022
c34f764
Update test_util/std submodule
arnauorriols Jul 28, 2022
eb7b529
Implement Aarch64-apple-darwin and win64
arnauorriols Aug 10, 2022
c1b4341
Correct spelling errors
arnauorriols Aug 16, 2022
b018240
Cleanup and extend documentation
arnauorriols Aug 16, 2022
144442b
Implement stack-allocation unit tests for sysv and aarch64. Pending w…
arnauorriols Aug 16, 2022
93f43dc
Add integer-casting test for win64
arnauorriols Aug 16, 2022
a0d5201
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
arnauorriols Aug 17, 2022
9ddf3b4
Remove prelude.h file
arnauorriols Aug 19, 2022
d40515e
Encapsulate optimized testing in helper function
arnauorriols Aug 19, 2022
5d372a6
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
arnauorriols Aug 19, 2022
618d471
Extract alignment expression into helper function
arnauorriols Aug 20, 2022
1b9c192
Add various debug assertions to ensure memory-related preconditions
arnauorriols Aug 21, 2022
b6976b4
optimize loading of ffi pointer in aarch64-apple
arnauorriols Aug 22, 2022
90b4fe2
Rename `Integer::TypedArray` => `Integer::Pointer`
arnauorriols Aug 22, 2022
7ecad49
Remove leftover commented code
arnauorriols Aug 23, 2022
924b2fb
Use `padding_to_align` to calculate padding of stack parameters in aa…
arnauorriols Aug 24, 2022
f323ddf
add small helper macros to improve density of assembly lines
arnauorriols Aug 24, 2022
9728da9
move helper macros
arnauorriols Aug 24, 2022
65863c9
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
arnauorriols Aug 30, 2022
ba0047f
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
ry Sep 5, 2022
73f05c7
fix
ry Sep 5, 2022
a4e8b9f
Workaround Aarch64Apple calling convention to match V8's incorrect one
arnauorriols Sep 6, 2022
1680832
Cleanup temporary TODO comments
arnauorriols Sep 7, 2022
3b9f726
Improve clarifying comment and apply explicitness suggestion
arnauorriols Sep 7, 2022
d900b2b
format
littledivy Sep 7, 2022
7edffb1
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
littledivy Sep 7, 2022
4000c7b
Merge branch 'main' into refactor/ffi-trampoline-plain-asm
littledivy Sep 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Encapsulate optimized testing in helper function
  • Loading branch information
arnauorriols committed Aug 19, 2022
commit d40515e8dd3f025963924eaeca1cfae7fbc10372
100 changes: 38 additions & 62 deletions test_ffi/tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 226,7 @@ function returnBuffer() { return return_buffer(); };
returnBuffer();
%OptimizeFunctionOnNextCall(returnBuffer);
const ptr0 = returnBuffer();

const status = %GetOptimizationStatus(returnBuffer);
if (!(status & (1 << 4))) {
throw new Error("returnBuffer is not optimized");
}
assertIsOptimized(returnBuffer);

dylib.symbols.print_buffer(ptr0, 8);
const ptrView = new Deno.UnsafePointerView(ptr0);
Expand Down Expand Up @@ -282,19 278,10 @@ const { add_u32, add_usize_fast } = symbols;
function addU32Fast(a, b) {
return add_u32(a, b);
};

%PrepareFunctionForOptimization(addU32Fast);
console.log(addU32Fast(123, 456));
%OptimizeFunctionOnNextCall(addU32Fast);
console.log(addU32Fast(123, 456));
assertOptimized(addU32Fast);
testOptimized(addU32Fast, () => addU32Fast(123, 456));

function addU64Fast(a, b) { return add_usize_fast(a, b); };
%PrepareFunctionForOptimization(addU64Fast);
console.log(addU64Fast(2, 3));
%OptimizeFunctionOnNextCall(addU64Fast);
console.log(addU64Fast(2, 3));
assertOptimized(addU64Fast);
testOptimized(addU64Fast, () => addU64Fast(2, 3));

console.log(dylib.symbols.add_i32(123, 456));
console.log(dylib.symbols.add_u64(0xffffffffn, 0xffffffffn));
Expand All @@ -313,22 300,12 @@ console.log(dylib.symbols.add_f64(123.123, 456.789));
function addF32Fast(a, b) {
return dylib.symbols.add_f32(a, b);
};

%PrepareFunctionForOptimization(addF32Fast);
console.log(addF32Fast(123.123, 456.789));
%OptimizeFunctionOnNextCall(addF32Fast);
console.log(addF32Fast(123.123, 456.789));
assertOptimized(addF32Fast);
testOptimized(addF32Fast, () => addF32Fast(123.123, 456.789));

function addF64Fast(a, b) {
return dylib.symbols.add_f64(a, b);
};

%PrepareFunctionForOptimization(addF64Fast);
console.log(addF64Fast(123.123, 456.789));
%OptimizeFunctionOnNextCall(addF64Fast);
console.log(addF64Fast(123.123, 456.789));
assertOptimized(addF64Fast);
testOptimized(addF64Fast, () => addF64Fast(123.123, 456.789));

// Test adders as nonblocking calls
console.log(await dylib.symbols.add_i32_nonblocking(123, 456));
Expand Down Expand Up @@ -476,47 453,35 @@ dylib.symbols.call_stored_function_2(20);
function logManyParametersFast(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s) {
return symbols.log_many_parameters(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s);
};
testOptimized(
logManyParametersFast,
() => logManyParametersFast(
255, 65535, 4294967295, 4294967296, 123.456, 789.876, -1, -2, -3, -4, -1000, 1000,
12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910
)
);

%PrepareFunctionForOptimization(logManyParametersFast);
logManyParametersFast(255, 65535, 4294967295, 4294967296, 123.456, 789.876, -1, -2, -3, -4, -1000, 1000, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910);
%OptimizeFunctionOnNextCall(logManyParametersFast);
logManyParametersFast(255, 65535, 4294967295, 4294967296, 123.456, 789.876, -1, -2, -3, -4, -1000, 1000, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910, 12345.678910);
assertOptimized(logManyParametersFast);

function castU8U32Fast(x) { return symbols.cast_u8_u32(x); };

%PrepareFunctionForOptimization(castU8U32Fast);
console.log(castU8U32Fast(256));
%OptimizeFunctionOnNextCall(castU8U32Fast);
// Some ABIs rely on the convention to zero/sign-extend arguments by the caller to optimize the callee function.
// If the trampoline did not zero/sign-extend arguments, this would return 256 instead of the expected 0 (in optimized builds)
console.log(castU8U32Fast(256));
assertOptimized(castU8U32Fast);

function castU32U8Fast(x) { return symbols.cast_u32_u8(x); };
function castU8U32Fast(x) { return symbols.cast_u8_u32(x); };
testOptimized(castU8U32Fast, () => castU8U32Fast(256));

%PrepareFunctionForOptimization(castU32U8Fast);
console.log(castU32U8Fast(256));
%OptimizeFunctionOnNextCall(castU32U8Fast);
// Some ABIs rely on the convention to expect garbage in the bits beyond the size of the return value to optimize the callee function.
// If the trampoline did not zero/sign-extend the return value, this would return 256 instead of the expected 0 (in optimized builds)
console.log(castU32U8Fast(256));
assertOptimized(castU32U8Fast);
function castU32U8Fast(x) { return symbols.cast_u32_u8(x); };
testOptimized(castU32U8Fast, () => castU32U8Fast(256));

// Generally the trampoline tail-calls into the FFI function, but in certain cases (e.g. when returning 8 or 16 bit integers)
// the tail call is not possible and a new stack frame must be created. We need enough parameters to have some on the stack
function addManyU16Fast(a, b, c, d, e, f, g, h, i, j, k, l, m) {
return symbols.add_many_u16(a, b, c, d, e, f, g, h, i, j, k, l, m);
};
// TODO: this test currently fails in aarch64-apple-darwin (also in branch main!). The reason is v8 does not follow Apple's custom
// ABI properly (aligns arguments to 8 byte boundaries instead of the natural alignment of the parameter type).
// A decision needs to be taken:
// 1. leave it broken and wait for v8 to fix the bug
// 2. Adapt to v8 bug and follow its ABI instead of Apple's. When V8 fixes the implementation, we'll have to fix it here as well
function addManyU16Fast(a, b, c, d, e, f, g, h, i, j, k, l, m) { return symbols.add_many_u16(a, b, c, d, e, f, g, h, i, j, k, l, m); };

%PrepareFunctionForOptimization(addManyU16Fast);
console.log(addManyU16Fast(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
%OptimizeFunctionOnNextCall(addManyU16Fast);
// Generally the trampoline tail-calls into the FFI function, but in certain cases (e.g. when returning 8 or 16 bit integers)
// the tail call is not possible and a new stack frame must be created. We need enough parameters to have some on the stack
console.log(addManyU16Fast(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
assertOptimized(addManyU16Fast);
testOptimized(addManyU16Fast, () => addManyU16Fast(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));


const nestedCallback = new Deno.UnsafeCallback(
Expand Down Expand Up @@ -588,10 553,7 @@ try {
const bytes = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
function hash() { return dylib.symbols.hash(bytes, bytes.byteLength); };

%PrepareFunctionForOptimization(hash);
console.log(hash());
%OptimizeFunctionOnNextCall(hash);
console.log(hash());
testOptimized(hash, () => hash());

(function cleanup() {
dylib.close();
Expand Down Expand Up @@ -619,7 581,21 @@ After: ${postStr}`,
console.log("Correct number of resources");
})();

function assertOptimized(fn) {
function assertIsOptimized(fn) {
const status = % GetOptimizationStatus(fn);
assert(status & (1 << 4), `expected ${fn.name} to be optimized, but wasn't`);
}

function testOptimized(fn, callback) {
%PrepareFunctionForOptimization(fn);
const r1 = callback();
if (r1 !== undefined) {
console.log(r1);
}
%OptimizeFunctionOnNextCall(fn);
const r2 = callback();
if (r2 !== undefined) {
console.log(r2);
}
assertIsOptimized(fn);
}
arnauorriols marked this conversation as resolved.
Show resolved Hide resolved