diff --git a/skynet/3rd/jemalloc/.travis.yml b/skynet/3rd/jemalloc/.travis.yml index 40b2eb5..2da5da8 100644 --- a/skynet/3rd/jemalloc/.travis.yml +++ b/skynet/3rd/jemalloc/.travis.yml @@ -23,6 +23,8 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -41,6 +43,8 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: osx env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: osx + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: osx env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -54,6 +58,8 @@ matrix: env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -74,6 +80,9 @@ matrix: - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" addons: *gcc_multilib + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + addons: *gcc_multilib - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" addons: *gcc_multilib @@ -92,6 +101,8 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -104,6 +115,8 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -114,6 +127,8 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -122,6 +137,8 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux @@ -130,6 +147,14 @@ matrix: env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds" + - os: linux + env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds" - os: linux diff --git a/skynet/3rd/jemalloc/ChangeLog b/skynet/3rd/jemalloc/ChangeLog index 29a00fb..7c73a8f 100644 --- a/skynet/3rd/jemalloc/ChangeLog +++ b/skynet/3rd/jemalloc/ChangeLog @@ -4,7 +4,110 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc -* 5.1.0 (May 4th, 2018) +* 5.2.0 (April 2, 2019) + + This release includes a few notable improvements, which are summarized below: + 1) improved fast-path performance from the optimizations by @djwatson; 2) + reduced virtual memory fragmentation and metadata usage; and 3) bug fixes on + setting the number of background threads. In addition, peak / spike memory + usage is improved with certain allocation patterns. As usual, the release and + prior dev versions have gone through large-scale production testing. + + New features: + - Implement oversize_threshold, which uses a dedicated arena for allocations + crossing the specified threshold to reduce fragmentation. (@interwq) + - Add extents usage information to stats. (@tyleretzel) + - Log time information for sampled allocations. (@tyleretzel) + - Support 0 size in sdallocx. (@djwatson) + - Output rate for certain counters in malloc_stats. (@zinoale) + - Add configure option --enable-readlinkat, which allows the use of readlinkat + over readlink. (@davidtgoldblatt) + - Add configure options --{enable,disable}-{static,shared} to allow not + building unwanted libraries. (@Ericson2314) + - Add configure option --disable-libdl to enable fully static builds. + (@interwq) + - Add mallctl interfaces: + + opt.oversize_threshold (@interwq) + + stats.arenas..extent_avail (@tyleretzel) + + stats.arenas..extents..n{dirty,muzzy,retained} (@tyleretzel) + + stats.arenas..extents..{dirty,muzzy,retained}_bytes + (@tyleretzel) + + Portability improvements: + - Update MSVC builds. (@maksqwe, @rustyx) + - Workaround a compiler optimizer bug on s390x. (@rkmisra) + - Make use of pthread_set_name_np(3) on FreeBSD. (@trasz) + - Implement malloc_getcpu() to enable percpu_arena for windows. (@santagada) + - Link against -pthread instead of -lpthread. (@paravoid) + - Make background_thread not dependent on libdl. (@interwq) + - Add stringify to fix a linker directive issue on MSVC. (@daverigby) + - Detect and fall back when 8-bit atomics are unavailable. (@interwq) + - Fall back to the default pthread_create if dlsym(3) fails. (@interwq) + + Optimizations and refactors: + - Refactor the TSD module. (@davidtgoldblatt) + - Avoid taking extents_muzzy mutex when muzzy is disabled. (@interwq) + - Avoid taking large_mtx for auto arenas on the tcache flush path. (@interwq) + - Optimize ixalloc by avoiding a size lookup. (@interwq) + - Implement opt.oversize_threshold which uses a dedicated arena for requests + crossing the threshold, also eagerly purges the oversize extents. Default + the threshold to 8 MiB. (@interwq) + - Clean compilation with -Wextra. (@gnzlbg, @jasone) + - Refactor the size class module. (@davidtgoldblatt) + - Refactor the stats emitter. (@tyleretzel) + - Optimize pow2_ceil. (@rkmisra) + - Avoid runtime detection of lazy purging on FreeBSD. (@trasz) + - Optimize mmap(2) alignment handling on FreeBSD. (@trasz) + - Improve error handling for THP state initialization. (@jsteemann) + - Rework the malloc() fast path. (@djwatson) + - Rework the free() fast path. (@djwatson) + - Refactor and optimize the tcache fill / flush paths. (@djwatson) + - Optimize sync / lwsync on PowerPC. (@chmeeedalf) + - Bypass extent_dalloc() when retain is enabled. (@interwq) + - Optimize the locking on large deallocation. (@interwq) + - Reduce the number of pages committed from sanity checking in debug build. + (@trasz, @interwq) + - Deprecate OSSpinLock. (@interwq) + - Lower the default number of background threads to 4 (when the feature + is enabled). (@interwq) + - Optimize the trylock spin wait. (@djwatson) + - Use arena index for arena-matching checks. (@interwq) + - Avoid forced decay on thread termination when using background threads. + (@interwq) + - Disable muzzy decay by default. (@djwatson, @interwq) + - Only initialize libgcc unwinder when profiling is enabled. (@paravoid, + @interwq) + + Bug fixes (all only relevant to jemalloc 5.x): + - Fix background thread index issues with max_background_threads. (@djwatson, + @interwq) + - Fix stats output for opt.lg_extent_max_active_fit. (@interwq) + - Fix opt.prof_prefix initialization. (@davidtgoldblatt) + - Properly trigger decay on tcache destroy. (@interwq, @amosbird) + - Fix tcache.flush. (@interwq) + - Detect whether explicit extent zero out is necessary with huge pages or + custom extent hooks, which may change the purge semantics. (@interwq) + - Fix a side effect caused by extent_max_active_fit combined with decay-based + purging, where freed extents can accumulate and not be reused for an + extended period of time. (@interwq, @mpghf) + - Fix a missing unlock on extent register error handling. (@zoulasc) + + Testing: + - Simplify the Travis script output. (@gnzlbg) + - Update the test scripts for FreeBSD. (@devnexen) + - Add unit tests for the producer-consumer pattern. (@interwq) + - Add Cirrus-CI config for FreeBSD builds. (@jasone) + - Add size-matching sanity checks on tcache flush. (@davidtgoldblatt, + @interwq) + + Incompatible changes: + - Remove --with-lg-page-sizes. (@davidtgoldblatt) + + Documentation: + - Attempt to build docs by default, however skip doc building when xsltproc + is missing. (@interwq, @cmuellner) + +* 5.1.0 (May 4, 2018) This release is primarily about fine-tuning, ranging from several new features to numerous notable performance and portability enhancements. The release and diff --git a/skynet/3rd/jemalloc/Makefile.in b/skynet/3rd/jemalloc/Makefile.in index 482cee4..8ceb913 100644 --- a/skynet/3rd/jemalloc/Makefile.in +++ b/skynet/3rd/jemalloc/Makefile.in @@ -56,6 +56,7 @@ cfghdrs_out := @cfghdrs_out@ cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ +enable_doc := @enable_doc@ enable_shared := @enable_shared@ enable_static := @enable_static@ enable_prof := @enable_prof@ @@ -117,6 +118,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/prng.c \ $(srcroot)src/prof.c \ $(srcroot)src/rtree.c \ + $(srcroot)src/safety_check.c \ $(srcroot)src/stats.c \ $(srcroot)src/sc.c \ $(srcroot)src/sz.c \ @@ -178,6 +180,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/div.c \ $(srcroot)test/unit/emitter.c \ $(srcroot)test/unit/extent_quantize.c \ + $(srcroot)test/unit/extent_util.c \ $(srcroot)test/unit/fork.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/hook.c \ @@ -208,6 +211,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/retained.c \ $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/safety_check.c \ $(srcroot)test/unit/seq.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/sc.c \ @@ -513,7 +517,11 @@ done install_doc: build_doc install_doc_html install_doc_man -install: install_bin install_include install_lib install_doc +install: install_bin install_include install_lib + +ifeq ($(enable_doc), 1) +install: install_doc +endif tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE)) tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE)) diff --git a/skynet/3rd/jemalloc/bin/jeprof.in b/skynet/3rd/jemalloc/bin/jeprof.in index 588c6b4..3ed408c 100644 --- a/skynet/3rd/jemalloc/bin/jeprof.in +++ b/skynet/3rd/jemalloc/bin/jeprof.in @@ -2909,6 +2909,7 @@ sub RemoveUninterestingFrames { '@JEMALLOC_PREFIX@xallocx', '@JEMALLOC_PREFIX@dallocx', '@JEMALLOC_PREFIX@sdallocx', + '@JEMALLOC_PREFIX@sdallocx_noflags', 'tc_calloc', 'tc_cfree', 'tc_malloc', @@ -5366,7 +5367,7 @@ sub GetProcedureBoundaries { my $demangle_flag = ""; my $cppfilt_flag = ""; my $to_devnull = ">$dev_null 2>&1"; - if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) { + if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) { # In this mode, we do "nm --demangle " $demangle_flag = "--demangle"; $cppfilt_flag = ""; diff --git a/skynet/3rd/jemalloc/configure.ac b/skynet/3rd/jemalloc/configure.ac index 96f76d3..39a540f 100644 --- a/skynet/3rd/jemalloc/configure.ac +++ b/skynet/3rd/jemalloc/configure.ac @@ -851,6 +851,18 @@ if test "x${je_cv_format_printf}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ]) fi +dnl Check for format_arg(...) attribute support. +JE_CFLAGS_SAVE() +JE_CFLAGS_ADD([-Werror]) +JE_CFLAGS_ADD([-herror_on_warning]) +JE_COMPILABLE([format(printf, ...) attribute], [#include ], + [const char * __attribute__((__format_arg__(1))) foo(const char *format);], + [je_cv_format_arg]) +JE_CFLAGS_RESTORE() +if test "x${je_cv_format_arg}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ]) +fi + dnl Support optional additions to rpath. AC_ARG_WITH([rpath], [AS_HELP_STRING([--with-rpath=], [Colon-separated rpath (ELF systems only)])], @@ -881,6 +893,19 @@ AC_PROG_RANLIB AC_PATH_PROG([LD], [ld], [false], [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) +dnl Enable documentation +AC_ARG_ENABLE([doc], + [AS_HELP_STRING([--enable-documentation], [Build documentation])], +if test "x$enable_doc" = "xno" ; then + enable_doc="0" +else + enable_doc="1" +fi +, +enable_doc="1" +) +AC_SUBST([enable_doc]) + dnl Enable shared libs AC_ARG_ENABLE([shared], [AS_HELP_STRING([--enable-shared], [Build shared libaries])], @@ -1406,22 +1431,22 @@ if test "x$enable_readlinkat" = "x1" ; then fi AC_SUBST([enable_readlinkat]) -dnl Avoid the extra size checking by default -AC_ARG_ENABLE([extra-size-check], - [AS_HELP_STRING([--enable-extra-size-check], - [Perform additonal size related sanity checks])], -[if test "x$enable_extra_size_check" = "xno" ; then - enable_extra_size_check="0" +dnl Avoid extra safety checks by default +AC_ARG_ENABLE([opt-safety-checks], + [AS_HELP_STRING([--enable-opt-safety-checks], + [Perform certain low-overhead checks, even in opt mode])], +[if test "x$enable_opt_safety_checks" = "xno" ; then + enable_opt_safety_checks="0" else - enable_extra_size_check="1" + enable_opt_safety_checks="1" fi ], -[enable_extra_size_check="0"] +[enable_opt_safety_checks="0"] ) -if test "x$enable_extra_size_check" = "x1" ; then - AC_DEFINE([JEMALLOC_EXTRA_SIZE_CHECK], [ ]) +if test "x$enable_opt_safety_checks" = "x1" ; then + AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ]) fi -AC_SUBST([enable_extra_size_check]) +AC_SUBST([enable_opt_safety_checks]) JE_COMPILABLE([a program using __builtin_unreachable], [ void foo (void) { @@ -2357,6 +2382,7 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) AC_MSG_RESULT([malloc_conf : ${config_malloc_conf}]) +AC_MSG_RESULT([documentation : ${enable_doc}]) AC_MSG_RESULT([shared libs : ${enable_shared}]) AC_MSG_RESULT([static libs : ${enable_static}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) diff --git a/skynet/3rd/jemalloc/doc/jemalloc.xml.in b/skynet/3rd/jemalloc/doc/jemalloc.xml.in index fe322e1..194f1ef 100644 --- a/skynet/3rd/jemalloc/doc/jemalloc.xml.in +++ b/skynet/3rd/jemalloc/doc/jemalloc.xml.in @@ -904,6 +904,23 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", + + + opt.confirm_conf + (bool) + r- + + Confirm-runtime-options-when-program-starts + enabled/disabled. If true, the string specified via + , the string pointed to by the + global variable malloc_conf, the name + of the file referenced by the symbolic link named + /etc/malloc.conf, and the value of + the environment variable MALLOC_CONF, will be printed in + order. Then, each option being set will be individually printed. This + option is disabled by default. + + opt.abort_conf @@ -992,6 +1009,24 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", number of CPUs, or one if there is a single CPU. + + + opt.oversize_threshold + (size_t) + r- + + The threshold in bytes of which requests are considered + oversize. Allocation requests with greater sizes are fulfilled from a + dedicated arena (automatically managed, however not within + narenas), in order to reduce fragmentation by not + mixing huge allocations with small ones. In addition, the decay API + guarantees on the extents greater than the specified threshold may be + overridden. Note that requests with arena index specified via + MALLOCX_ARENA, or threads associated with explicit + arenas will not be considered. The default threshold is 8MiB. Values + not within large size classes disables this feature. + + opt.percpu_arena @@ -1013,7 +1048,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", opt.background_thread - (const bool) + (bool) r- Internal background worker threads enabled/disabled. @@ -1028,7 +1063,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", opt.max_background_threads - (const size_t) + (size_t) r- Maximum number of background threads that will be created @@ -1059,7 +1094,11 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", linkend="arena.i.dirty_decay_ms">arena.<i>.dirty_decay_ms for related dynamic control options. See opt.muzzy_decay_ms - for a description of muzzy pages. + for a description of muzzy pages.for a description of muzzy pages. Note + that when the oversize_threshold + feature is enabled, the arenas reserved for oversize requests may have + its own default decay settings. @@ -2776,6 +2815,28 @@ struct extent_hooks_s { all bin size classes. + + + stats.arenas.<i>.small.nfills + (uint64_t) + r- + [] + + Cumulative number of tcache fills by all small size + classes. + + + + + stats.arenas.<i>.small.nflushes + (uint64_t) + r- + [] + + Cumulative number of tcache flushes by all small size + classes. + + stats.arenas.<i>.large.allocated @@ -2826,6 +2887,28 @@ struct extent_hooks_s { all large size classes. + + + stats.arenas.<i>.large.nfills + (uint64_t) + r- + [] + + Cumulative number of tcache fills by all large size + classes. + + + + + stats.arenas.<i>.large.nflushes + (uint64_t) + r- + [] + + Cumulative number of tcache flushes by all large size + classes. + + stats.arenas.<i>.bins.<j>.nmalloc @@ -2925,6 +3008,17 @@ struct extent_hooks_s { Current number of slabs. + + + + stats.arenas.<i>.bins.<j>.nonfull_slabs + (size_t) + r- + [] + + Current number of nonfull slabs. + + stats.arenas.<i>.bins.<j>.mutex.{counter} diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/arena_externs.h b/skynet/3rd/jemalloc/include/jemalloc/internal/arena_externs.h index 2bdddb7..a4523ae 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/arena_externs.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/arena_externs.h @@ -60,7 +60,7 @@ void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero); void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); +void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize); void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin, diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/arena_inlines_b.h b/skynet/3rd/jemalloc/include/jemalloc/internal/arena_inlines_b.h index 614dedd..7e61a44 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/arena_inlines_b.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/arena_inlines_b.h @@ -90,7 +90,7 @@ arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, assert(ptr != NULL); extent_t *extent = iealloc(tsdn, ptr); - /* + /* * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're * sure we have a sampled allocation. */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/arena_stats.h b/skynet/3rd/jemalloc/include/jemalloc/internal/arena_stats.h index ef1e25b..3ffe9c7 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/arena_stats.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/arena_stats.h @@ -35,6 +35,13 @@ struct arena_stats_large_s { * periodically merges into this counter. */ arena_stats_u64_t nrequests; /* Partially derived. */ + /* + * Number of tcache fills / flushes for large (similarly, periodically + * merged). Note that there is no large tcache batch-fill currently + * (i.e. only fill 1 at a time); however flush may be batched. + */ + arena_stats_u64_t nfills; /* Partially derived. */ + arena_stats_u64_t nflushes; /* Partially derived. */ /* Current number of allocations of this size class. */ size_t curlextents; /* Derived. */ @@ -101,6 +108,8 @@ struct arena_stats_s { atomic_zu_t allocated_large; /* Derived. */ arena_stats_u64_t nmalloc_large; /* Derived. */ arena_stats_u64_t ndalloc_large; /* Derived. */ + arena_stats_u64_t nfills_large; /* Derived. */ + arena_stats_u64_t nflushes_large; /* Derived. */ arena_stats_u64_t nrequests_large; /* Derived. */ /* Number of bytes cached in tcache associated with this arena. */ @@ -240,11 +249,12 @@ arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { } static inline void -arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, +arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, szind_t szind, uint64_t nrequests) { arena_stats_lock(tsdn, arena_stats); - arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - - SC_NBINS].nrequests, nrequests); + arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS]; + arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests); + arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1); arena_stats_unlock(tsdn, arena_stats); } diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/bin.h b/skynet/3rd/jemalloc/include/jemalloc/internal/bin.h index f542c88..8547e89 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/bin.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/bin.h @@ -116,6 +116,7 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) { dst_bin_stats->nslabs += bin->stats.nslabs; dst_bin_stats->reslabs += bin->stats.reslabs; dst_bin_stats->curslabs += bin->stats.curslabs; + dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs; malloc_mutex_unlock(tsdn, &bin->lock); } diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/bin_stats.h b/skynet/3rd/jemalloc/include/jemalloc/internal/bin_stats.h index 86e673e..d04519c 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/bin_stats.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/bin_stats.h @@ -45,6 +45,9 @@ struct bin_stats_s { /* Current number of slabs in this bin. */ size_t curslabs; + /* Current size of nonfull slabs heap in this bin. */ + size_t nonfull_slabs; + mutex_prof_data_t mutex_data; }; diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/ctl.h b/skynet/3rd/jemalloc/include/jemalloc/internal/ctl.h index 775fdec..1d1aacc 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/ctl.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/ctl.h @@ -39,6 +39,8 @@ typedef struct ctl_arena_stats_s { uint64_t nmalloc_small; uint64_t ndalloc_small; uint64_t nrequests_small; + uint64_t nfills_small; + uint64_t nflushes_small; bin_stats_t bstats[SC_NBINS]; arena_stats_large_t lstats[SC_NSIZES - SC_NBINS]; diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/emitter.h b/skynet/3rd/jemalloc/include/jemalloc/internal/emitter.h index 0a8bc2c..542bc79 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/emitter.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/emitter.h @@ -86,10 +86,11 @@ emitter_printf(emitter_t *emitter, const char *format, ...) { va_end(ap); } -static inline void +static inline const char * JEMALLOC_FORMAT_ARG(3) emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier, emitter_justify_t justify, int width) { size_t written; + fmt_specifier++; if (justify == emitter_justify_none) { written = malloc_snprintf(out_fmt, out_size, "%%%s", fmt_specifier); @@ -102,6 +103,7 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier, } /* Only happens in case of bad format string, which *we* choose. */ assert(written < out_size); + return out_fmt; } /* @@ -127,26 +129,27 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width, char buf[BUF_SIZE]; #define EMIT_SIMPLE(type, format) \ - emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width); \ - emitter_printf(emitter, fmt, *(const type *)value); \ + emitter_printf(emitter, \ + emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width), \ + *(const type *)value); switch (value_type) { case emitter_type_bool: - emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width); - emitter_printf(emitter, fmt, *(const bool *)value ? - "true" : "false"); + emitter_printf(emitter, + emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), + *(const bool *)value ? "true" : "false"); break; case emitter_type_int: - EMIT_SIMPLE(int, "d") + EMIT_SIMPLE(int, "%d") break; case emitter_type_unsigned: - EMIT_SIMPLE(unsigned, "u") + EMIT_SIMPLE(unsigned, "%u") break; case emitter_type_ssize: - EMIT_SIMPLE(ssize_t, "zd") + EMIT_SIMPLE(ssize_t, "%zd") break; case emitter_type_size: - EMIT_SIMPLE(size_t, "zu") + EMIT_SIMPLE(size_t, "%zu") break; case emitter_type_string: str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", @@ -156,17 +159,17 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width, * anywhere near the fmt size. */ assert(str_written < BUF_SIZE); - emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width); - emitter_printf(emitter, fmt, buf); + emitter_printf(emitter, + emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf); break; case emitter_type_uint32: - EMIT_SIMPLE(uint32_t, FMTu32) + EMIT_SIMPLE(uint32_t, "%" FMTu32) break; case emitter_type_uint64: - EMIT_SIMPLE(uint64_t, FMTu64) + EMIT_SIMPLE(uint64_t, "%" FMTu64) break; case emitter_type_title: - EMIT_SIMPLE(char *const, "s"); + EMIT_SIMPLE(char *const, "%s"); break; default: unreachable(); diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/extent_externs.h b/skynet/3rd/jemalloc/include/jemalloc/internal/extent_externs.h index 8680251..8aba576 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/extent_externs.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/extent_externs.h @@ -24,7 +24,7 @@ size_t extent_size_quantize_floor(size_t size); size_t extent_size_quantize_ceil(size_t size); #endif -rb_proto(, extent_avail_, extent_tree_t, extent_t) +ph_proto(, extent_avail_, extent_tree_t, extent_t) ph_proto(, extent_heap_, extent_heap_t, extent_t) bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state, @@ -74,4 +74,10 @@ bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, bool extent_boot(void); +void extent_util_stats_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size); +void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size, + size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr); + #endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/extent_structs.h b/skynet/3rd/jemalloc/include/jemalloc/internal/extent_structs.h index ceb1897..ad6710e 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/extent_structs.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/extent_structs.h @@ -228,4 +228,25 @@ struct extents_s { bool delay_coalesce; }; +/* + * The following two structs are for experimental purposes. See + * experimental_utilization_query_ctl and + * experimental_utilization_batch_query_ctl in src/ctl.c. + */ + +struct extent_util_stats_s { + size_t nfree; + size_t nregs; + size_t size; +}; + +struct extent_util_stats_verbose_s { + void *slabcur_addr; + size_t nfree; + size_t nregs; + size_t size; + size_t bin_nfree; + size_t bin_nregs; +}; + #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/extent_types.h b/skynet/3rd/jemalloc/include/jemalloc/internal/extent_types.h index acbcf27..865f8a1 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/extent_types.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/extent_types.h @@ -4,6 +4,9 @@ typedef struct extent_s extent_t; typedef struct extents_s extents_t; +typedef struct extent_util_stats_s extent_util_stats_t; +typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t; + #define EXTENT_HOOKS_INITIALIZER NULL /* diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in index 21b6514..c442a21 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -360,7 +360,7 @@ */ #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE -/* Performs additional size-matching sanity checks when defined. */ -#undef JEMALLOC_EXTRA_SIZE_CHECK +/* Performs additional safety checks when defined. */ +#undef JEMALLOC_OPT_SAFETY_CHECKS #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h b/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h index b784362..d291170 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h @@ -10,6 +10,7 @@ extern bool malloc_slow; /* Run-time options. */ extern bool opt_abort; extern bool opt_abort_conf; +extern bool opt_confirm_conf; extern const char *opt_junk; extern bool opt_junk_alloc; extern bool opt_junk_free; @@ -51,5 +52,6 @@ void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); bool malloc_initialized(void); +void je_sdallocx_noflags(void *ptr, size_t size); #endif /* JEMALLOC_INTERNAL_EXTERNS_H */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in b/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in index 4bfdb32..3418cbf 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in @@ -161,6 +161,25 @@ static const bool config_log = false #endif ; +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU) /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/prof_inlines_b.h b/skynet/3rd/jemalloc/include/jemalloc/internal/prof_inlines_b.h index 8358bff..8ba8a1e 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/prof_inlines_b.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/prof_inlines_b.h @@ -1,6 +1,7 @@ #ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H #define JEMALLOC_INTERNAL_PROF_INLINES_B_H +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sz.h" JEMALLOC_ALWAYS_INLINE bool @@ -71,7 +72,7 @@ prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { JEMALLOC_ALWAYS_INLINE void prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx, - nstime_t t) { + nstime_t t) { cassert(config_prof); assert(ptr != NULL); diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/safety_check.h b/skynet/3rd/jemalloc/include/jemalloc/internal/safety_check.h new file mode 100644 index 0000000..1b53fc4 --- /dev/null +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/safety_check.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H +#define JEMALLOC_INTERNAL_SAFETY_CHECK_H + +void safety_check_fail(const char *format, ...); +/* Can set to NULL for a default. */ +void safety_check_set_abort(void (*abort_fn)()); + +JEMALLOC_ALWAYS_INLINE void +safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) { + assert(usize < bumped_usize); + for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) { + *((unsigned char *)ptr + usize) = 0xBC; + } +} + +JEMALLOC_ALWAYS_INLINE void +safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize) +{ + for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) { + if (unlikely(*((unsigned char *)ptr + usize) != 0xBC)) { + safety_check_fail("Use after free error\n"); + } + } +} + +#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/sc.h b/skynet/3rd/jemalloc/include/jemalloc/internal/sc.h index ef0a451..9a099d8 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/sc.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/sc.h @@ -18,7 +18,7 @@ * each one covers allocations for base / SC_NGROUP possible allocation sizes. * We call that value (base / SC_NGROUP) the delta of the group. Each size class * is delta larger than the one before it (including the initial size class in a - * group, which is delta large than 2**base, the largest size class in the + * group, which is delta larger than base, the largest size class in the * previous group). * To make the math all work out nicely, we require that SC_NGROUP is a power of * two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of @@ -53,10 +53,11 @@ * classes; one per power of two, up until we hit the quantum size. There are * therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes. * - * Next, we have a size class of size LG_QUANTUM. This can't be the start of a - * group in the sense we described above (covering a power of two range) since, - * if we divided into it to pick a value of delta, we'd get a delta smaller than - * (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which is against the rules. + * Next, we have a size class of size (1 << LG_QUANTUM). This can't be the + * start of a group in the sense we described above (covering a power of two + * range) since, if we divided into it to pick a value of delta, we'd get a + * delta smaller than (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which + * is against the rules. * * The first base we can divide by SC_NGROUP while still being at least * (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by @@ -196,7 +197,7 @@ (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1) #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR) - /* The number of size classes that are a multiple of the page size. */ +/* The number of size classes that are a multiple of the page size. */ #define SC_NPSIZES ( \ /* Start with all the size classes. */ \ SC_NSIZES \ @@ -206,8 +207,20 @@ - SC_NPSEUDO \ /* And the tiny group. */ \ - SC_NTINY \ - /* Groups where ndelta*delta is not a multiple of the page size. */ \ - - (2 * (SC_NGROUP))) + /* Sizes where ndelta*delta is not a multiple of the page size. */ \ + - (SC_LG_NGROUP * SC_NGROUP)) +/* + * Note that the last line is computed as the sum of the second column in the + * following table: + * lg(base) | count of sizes to exclude + * ------------------------------|----------------------------- + * LG_PAGE - 1 | SC_NGROUP - 1 + * LG_PAGE | SC_NGROUP - 1 + * LG_PAGE + 1 | SC_NGROUP - 2 + * LG_PAGE + 2 | SC_NGROUP - 4 + * ... | ... + * LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2) + */ /* * We declare a size class is binnable if size < page size * group. Or, in other diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h index bf8801e..65852d5 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h @@ -3,8 +3,10 @@ #endif #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H -extern __thread tsd_t tsd_tls; -extern __thread bool tsd_initialized; +#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL + +extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls; +extern JEMALLOC_TSD_TYPE_ATTR(bool) tsd_initialized; extern bool tsd_booted; /* Initialization/cleanup. */ diff --git a/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_tls.h b/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_tls.h index f4f165c..7d6c805 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_tls.h +++ b/skynet/3rd/jemalloc/include/jemalloc/internal/tsd_tls.h @@ -3,7 +3,9 @@ #endif #define JEMALLOC_INTERNAL_TSD_TLS_H -extern __thread tsd_t tsd_tls; +#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL + +extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls; extern pthread_key_t tsd_tsd; extern bool tsd_booted; diff --git a/skynet/3rd/jemalloc/include/jemalloc/jemalloc_defs.h.in b/skynet/3rd/jemalloc/include/jemalloc/jemalloc_defs.h.in index 6d89435..11c3918 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/skynet/3rd/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -4,6 +4,9 @@ /* Defined if alloc_size attribute is supported. */ #undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +/* Defined if format_arg(...) attribute is supported. */ +#undef JEMALLOC_HAVE_ATTR_FORMAT_ARG + /* Defined if format(gnu_printf, ...) attribute is supported. */ #undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF diff --git a/skynet/3rd/jemalloc/include/jemalloc/jemalloc_macros.h.in b/skynet/3rd/jemalloc/include/jemalloc/jemalloc_macros.h.in index a00ce11..59e2955 100644 --- a/skynet/3rd/jemalloc/include/jemalloc/jemalloc_macros.h.in +++ b/skynet/3rd/jemalloc/include/jemalloc/jemalloc_macros.h.in @@ -69,6 +69,7 @@ # define JEMALLOC_EXPORT __declspec(dllimport) # endif # endif +# define JEMALLOC_FORMAT_ARG(i) # define JEMALLOC_FORMAT_PRINTF(s, i) # define JEMALLOC_NOINLINE __declspec(noinline) # ifdef __cplusplus @@ -96,6 +97,11 @@ # ifndef JEMALLOC_EXPORT # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) # endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +# else +# define JEMALLOC_FORMAT_ARG(i) +# endif # ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF # define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) # elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) diff --git a/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index ddc6781..228e8be 100644 --- a/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -66,6 +66,7 @@ + {8D6BB292-9E1C-413D-9F98-4864BDC1514A} @@ -346,4 +347,4 @@ - \ No newline at end of file + diff --git a/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index 1dcf4ed..d839515 100644 --- a/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/skynet/3rd/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -100,5 +100,8 @@ Source Files + + Source Files + - \ No newline at end of file + diff --git a/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index 21481d5..edcceed 100644 --- a/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -67,6 +67,7 @@ + {8D6BB292-9E1C-413D-9F98-4864BDC1514A} @@ -346,4 +347,4 @@ - \ No newline at end of file + diff --git a/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index 466dc63..6df7260 100644 --- a/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/skynet/3rd/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -103,5 +103,8 @@ Source Files + + Source Files + - \ No newline at end of file + diff --git a/skynet/3rd/jemalloc/scripts/gen_run_tests.py b/skynet/3rd/jemalloc/scripts/gen_run_tests.py index 5052b3e..a414f81 100644 --- a/skynet/3rd/jemalloc/scripts/gen_run_tests.py +++ b/skynet/3rd/jemalloc/scripts/gen_run_tests.py @@ -40,6 +40,7 @@ def powerset(items): '--enable-debug', '--enable-prof', '--disable-stats', + '--enable-opt-safety-checks', ] if bits_64: possible_config_opts.append('--with-lg-vaddr=56') diff --git a/skynet/3rd/jemalloc/scripts/gen_travis.py b/skynet/3rd/jemalloc/scripts/gen_travis.py index 65b0b67..f1478c6 100644 --- a/skynet/3rd/jemalloc/scripts/gen_travis.py +++ b/skynet/3rd/jemalloc/scripts/gen_travis.py @@ -46,6 +46,7 @@ '--enable-prof', '--disable-stats', '--disable-libdl', + '--enable-opt-safety-checks', ] malloc_conf_unusuals = [ diff --git a/skynet/3rd/jemalloc/src/arena.c b/skynet/3rd/jemalloc/src/arena.c index 60eac23..f9336fe 100644 --- a/skynet/3rd/jemalloc/src/arena.c +++ b/skynet/3rd/jemalloc/src/arena.c @@ -8,6 +8,7 @@ #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/util.h" JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS @@ -150,6 +151,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_u64(&astats->nrequests_large, nmalloc + nrequests); + /* nfill == nmalloc for large currently. */ + arena_stats_accum_u64(&lstats[i].nfills, nmalloc); + arena_stats_accum_u64(&astats->nfills_large, nmalloc); + + uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats, + &arena->stats.lstats[i].nflushes); + arena_stats_accum_u64(&lstats[i].nflushes, nflush); + arena_stats_accum_u64(&astats->nflushes_large, nflush); + assert(nmalloc >= ndalloc); assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); @@ -1001,11 +1011,17 @@ static void arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) > 0); extent_heap_insert(&bin->slabs_nonfull, slab); + if (config_stats) { + bin->stats.nonfull_slabs++; + } } static void arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) { extent_heap_remove(&bin->slabs_nonfull, slab); + if (config_stats) { + bin->stats.nonfull_slabs--; + } } static extent_t * @@ -1016,6 +1032,7 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) { } if (config_stats) { bin->stats.reslabs++; + bin->stats.nonfull_slabs--; } return slab; } @@ -1531,12 +1548,16 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, } void -arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) { +arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) { cassert(config_prof); assert(ptr != NULL); assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); assert(usize <= SC_SMALL_MAXCLASS); + if (config_opt_safety_checks) { + safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS); + } + rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -1577,10 +1598,19 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, assert(opt_prof); extent_t *extent = iealloc(tsdn, ptr); - size_t usize = arena_prof_demote(tsdn, extent, ptr); - if (usize <= tcache_maxclass) { + size_t usize = extent_usize_get(extent); + size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr); + if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) { + /* + * Currently, we only do redzoning for small sampled + * allocations. + */ + assert(bumped_usize == SC_LARGE_MINCLASS); + safety_check_verify_redzone(ptr, usize, bumped_usize); + } + if (bumped_usize <= tcache_maxclass) { tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - sz_size2index(usize), slow_path); + sz_size2index(bumped_usize), slow_path); } else { large_dalloc(tsdn, extent); } diff --git a/skynet/3rd/jemalloc/src/background_thread.c b/skynet/3rd/jemalloc/src/background_thread.c index 5ed6c1c..57b9b25 100644 --- a/skynet/3rd/jemalloc/src/background_thread.c +++ b/skynet/3rd/jemalloc/src/background_thread.c @@ -799,7 +799,13 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { nstime_init(&stats->run_interval, 0); for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsdn, &info->mtx); + if (malloc_mutex_trylock(tsdn, &info->mtx)) { + /* + * Each background thread run may take a long time; + * avoid waiting on the stats if the thread is active. + */ + continue; + } if (info->state != background_thread_stopped) { num_runs += info->tot_n_runs; nstime_add(&stats->run_interval, &info->tot_sleep_time); diff --git a/skynet/3rd/jemalloc/src/ctl.c b/skynet/3rd/jemalloc/src/ctl.c index 09310a9..1d83087 100644 --- a/skynet/3rd/jemalloc/src/ctl.c +++ b/skynet/3rd/jemalloc/src/ctl.c @@ -72,6 +72,7 @@ CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) CTL_PROTO(config_malloc_conf) +CTL_PROTO(config_opt_safety_checks) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) @@ -80,6 +81,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_confirm_conf) CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) @@ -155,10 +157,14 @@ CTL_PROTO(stats_arenas_i_small_allocated) CTL_PROTO(stats_arenas_i_small_nmalloc) CTL_PROTO(stats_arenas_i_small_ndalloc) CTL_PROTO(stats_arenas_i_small_nrequests) +CTL_PROTO(stats_arenas_i_small_nfills) +CTL_PROTO(stats_arenas_i_small_nflushes) CTL_PROTO(stats_arenas_i_large_allocated) CTL_PROTO(stats_arenas_i_large_nmalloc) CTL_PROTO(stats_arenas_i_large_ndalloc) CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_large_nfills) +CTL_PROTO(stats_arenas_i_large_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) @@ -168,6 +174,7 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nslabs) CTL_PROTO(stats_arenas_i_bins_j_nreslabs) CTL_PROTO(stats_arenas_i_bins_j_curslabs) +CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lextents_j_nmalloc) CTL_PROTO(stats_arenas_i_lextents_j_ndalloc) @@ -216,6 +223,10 @@ CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) CTL_PROTO(experimental_hooks_install) CTL_PROTO(experimental_hooks_remove) +CTL_PROTO(experimental_utilization_query) +CTL_PROTO(experimental_utilization_batch_query) +CTL_PROTO(experimental_arenas_i_pactivep) +INDEX_PROTO(experimental_arenas_i) #define MUTEX_STATS_CTL_PROTO_GEN(n) \ CTL_PROTO(stats_##n##_num_ops) \ @@ -284,6 +295,7 @@ static const ctl_named_node_t config_node[] = { {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, {NAME("malloc_conf"), CTL(config_malloc_conf)}, + {NAME("opt_safety_checks"), CTL(config_opt_safety_checks)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, @@ -295,6 +307,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, @@ -409,14 +422,18 @@ static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_small_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_small_nflushes)} }; static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_large_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_large_nflushes)} }; #define MUTEX_PROF_DATA_NODE(prefix) \ @@ -450,6 +467,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)}, {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)}, {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}, + {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)}, {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)} }; @@ -572,13 +590,31 @@ static const ctl_named_node_t stats_node[] = { {NAME("arenas"), CHILD(indexed, stats_arenas)} }; -static const ctl_named_node_t hooks_node[] = { +static const ctl_named_node_t experimental_hooks_node[] = { {NAME("install"), CTL(experimental_hooks_install)}, - {NAME("remove"), CTL(experimental_hooks_remove)}, + {NAME("remove"), CTL(experimental_hooks_remove)} +}; + +static const ctl_named_node_t experimental_utilization_node[] = { + {NAME("query"), CTL(experimental_utilization_query)}, + {NAME("batch_query"), CTL(experimental_utilization_batch_query)} +}; + +static const ctl_named_node_t experimental_arenas_i_node[] = { + {NAME("pactivep"), CTL(experimental_arenas_i_pactivep)} +}; +static const ctl_named_node_t super_experimental_arenas_i_node[] = { + {NAME(""), CHILD(named, experimental_arenas_i)} +}; + +static const ctl_indexed_node_t experimental_arenas_node[] = { + {INDEX(experimental_arenas_i)} }; static const ctl_named_node_t experimental_node[] = { - {NAME("hooks"), CHILD(named, hooks)} + {NAME("hooks"), CHILD(named, experimental_hooks)}, + {NAME("utilization"), CHILD(named, experimental_utilization)}, + {NAME("arenas"), CHILD(indexed, experimental_arenas)} }; static const ctl_named_node_t root_node[] = { @@ -742,6 +778,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) { ctl_arena->astats->nmalloc_small = 0; ctl_arena->astats->ndalloc_small = 0; ctl_arena->astats->nrequests_small = 0; + ctl_arena->astats->nfills_small = 0; + ctl_arena->astats->nflushes_small = 0; memset(ctl_arena->astats->bstats, 0, SC_NBINS * sizeof(bin_stats_t)); memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) * @@ -773,6 +811,10 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { ctl_arena->astats->bstats[i].ndalloc; ctl_arena->astats->nrequests_small += ctl_arena->astats->bstats[i].nrequests; + ctl_arena->astats->nfills_small += + ctl_arena->astats->bstats[i].nfills; + ctl_arena->astats->nflushes_small += + ctl_arena->astats->bstats[i].nflushes; } } else { arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads, @@ -855,6 +897,8 @@ MUTEX_PROF_ARENA_MUTEXES sdstats->nmalloc_small += astats->nmalloc_small; sdstats->ndalloc_small += astats->ndalloc_small; sdstats->nrequests_small += astats->nrequests_small; + sdstats->nfills_small += astats->nfills_small; + sdstats->nflushes_small += astats->nflushes_small; if (!destroyed) { accum_atomic_zu(&sdstats->astats.allocated_large, @@ -897,8 +941,11 @@ MUTEX_PROF_ARENA_MUTEXES if (!destroyed) { sdstats->bstats[i].curslabs += astats->bstats[i].curslabs; + sdstats->bstats[i].nonfull_slabs += + astats->bstats[i].nonfull_slabs; } else { assert(astats->bstats[i].curslabs == 0); + assert(astats->bstats[i].nonfull_slabs == 0); } malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data, &astats->bstats[i].mutex_data); @@ -1698,6 +1745,7 @@ CTL_RO_CONFIG_GEN(config_debug, bool) CTL_RO_CONFIG_GEN(config_fill, bool) CTL_RO_CONFIG_GEN(config_lazy_lock, bool) CTL_RO_CONFIG_GEN(config_malloc_conf, const char *) +CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool) CTL_RO_CONFIG_GEN(config_prof, bool) CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) @@ -1709,6 +1757,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) @@ -2714,7 +2763,7 @@ static int prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - + const char *filename = NULL; if (!config_prof) { @@ -2726,7 +2775,7 @@ prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (prof_log_start(tsd_tsdn(tsd), filename)) { ret = EFAULT; - goto label_return; + goto label_return; } ret = 0; @@ -2831,6 +2880,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, arenas_i(mib[2])->astats->ndalloc_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, arenas_i(mib[2])->astats->nrequests_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills, + arenas_i(mib[2])->astats->nfills_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes, + arenas_i(mib[2])->astats->nflushes_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, ATOMIC_RELAXED), size_t) @@ -2840,12 +2893,19 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t) /* - * Note: "nmalloc" here instead of "nrequests" in the read. This is intentional. + * Note: "nmalloc_large" here instead of "nfills" in the read. This is + * intentional (large has no batch fill). */ -CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills, ctl_arena_stats_read_u64( - &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */ + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes, + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t) /* Lock profiling related APIs below. */ #define RO_MUTEX_CTL_GEN(n, l) \ @@ -2955,6 +3015,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs, arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs, arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs, + arenas_i(mib[2])->astats->bstats[mib[4]].nonfull_slabs, size_t) static const ctl_named_node_t * stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, @@ -3020,15 +3082,23 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib, return super_stats_arenas_i_extents_j_node; } +static bool +ctl_arenas_i_verify(size_t i) { + size_t a = arenas_i2a_impl(i, true, true); + if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) { + return true; + } + + return false; +} + static const ctl_named_node_t * stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t *ret; - size_t a; malloc_mutex_lock(tsdn, &ctl_mtx); - a = arenas_i2a_impl(i, true, true); - if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) { + if (ctl_arenas_i_verify(i)) { ret = NULL; goto label_return; } @@ -3083,3 +3153,276 @@ experimental_hooks_remove_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, label_return: return ret; } + +/* + * Output six memory utilization entries for an input pointer, the first one of + * type (void *) and the remaining five of type size_t, describing the following + * (in the same order): + * + * (a) memory address of the extent a potential reallocation would go into, + * == the five fields below describe about the extent the pointer resides in == + * (b) number of free regions in the extent, + * (c) number of regions in the extent, + * (d) size of the extent in terms of bytes, + * (e) total number of free regions in the bin the extent belongs to, and + * (f) total number of regions in the bin the extent belongs to. + * + * Note that "(e)" and "(f)" are only available when stats are enabled; + * otherwise their values are undefined. + * + * This API is mainly intended for small class allocations, where extents are + * used as slab. + * + * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)" + * will be zero (if stats are enabled; otherwise undefined). The other three + * fields will be properly set though the values are trivial: "(b)" will be 0, + * "(c)" will be 1, and "(d)" will be the usable size. + * + * The input pointer and size are respectively passed in by newp and newlen, + * and the output fields and size are respectively oldp and *oldlenp. + * + * It can be beneficial to define the following macros to make it easier to + * access the output: + * + * #define SLABCUR_READ(out) (*(void **)out) + * #define COUNTS(out) ((size_t *)((void **)out + 1)) + * #define NFREE_READ(out) COUNTS(out)[0] + * #define NREGS_READ(out) COUNTS(out)[1] + * #define SIZE_READ(out) COUNTS(out)[2] + * #define BIN_NFREE_READ(out) COUNTS(out)[3] + * #define BIN_NREGS_READ(out) COUNTS(out)[4] + * + * and then write e.g. NFREE_READ(oldp) to fetch the output. See the unit test + * test_query in test/unit/extent_util.c for an example. + * + * For a typical defragmentation workflow making use of this API for + * understanding the fragmentation level, please refer to the comment for + * experimental_utilization_batch_query_ctl. + * + * It's up to the application how to determine the significance of + * fragmentation relying on the outputs returned. Possible choices are: + * + * (a) if extent utilization ratio is below certain threshold, + * (b) if extent memory consumption is above certain threshold, + * (c) if extent utilization ratio is significantly below bin utilization ratio, + * (d) if input pointer deviates a lot from potential reallocation address, or + * (e) some selection/combination of the above. + * + * The caller needs to make sure that the input/output arguments are valid, + * in particular, that the size of the output is correct, i.e.: + * + * *oldlenp = sizeof(void *) + sizeof(size_t) * 5 + * + * Otherwise, the function immediately returns EINVAL without touching anything. + * + * In the rare case where there's no associated extent found for the input + * pointer, the function zeros out all output fields and return. Please refer + * to the comment for experimental_utilization_batch_query_ctl to understand the + * motivation from C++. + */ +static int +experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + + assert(sizeof(extent_util_stats_verbose_t) + == sizeof(void *) + sizeof(size_t) * 5); + + if (oldp == NULL || oldlenp == NULL + || *oldlenp != sizeof(extent_util_stats_verbose_t) + || newp == NULL) { + ret = EINVAL; + goto label_return; + } + + void *ptr = NULL; + WRITE(ptr, void *); + extent_util_stats_verbose_t *util_stats + = (extent_util_stats_verbose_t *)oldp; + extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr, + &util_stats->nfree, &util_stats->nregs, &util_stats->size, + &util_stats->bin_nfree, &util_stats->bin_nregs, + &util_stats->slabcur_addr); + ret = 0; + +label_return: + return ret; +} + +/* + * Given an input array of pointers, output three memory utilization entries of + * type size_t for each input pointer about the extent it resides in: + * + * (a) number of free regions in the extent, + * (b) number of regions in the extent, and + * (c) size of the extent in terms of bytes. + * + * This API is mainly intended for small class allocations, where extents are + * used as slab. In case of large class allocations, the outputs are trivial: + * "(a)" will be 0, "(b)" will be 1, and "(c)" will be the usable size. + * + * Note that multiple input pointers may reside on a same extent so the output + * fields may contain duplicates. + * + * The format of the input/output looks like: + * + * input[0]: 1st_pointer_to_query | output[0]: 1st_extent_n_free_regions + * | output[1]: 1st_extent_n_regions + * | output[2]: 1st_extent_size + * input[1]: 2nd_pointer_to_query | output[3]: 2nd_extent_n_free_regions + * | output[4]: 2nd_extent_n_regions + * | output[5]: 2nd_extent_size + * ... | ... + * + * The input array and size are respectively passed in by newp and newlen, and + * the output array and size are respectively oldp and *oldlenp. + * + * It can be beneficial to define the following macros to make it easier to + * access the output: + * + * #define NFREE_READ(out, i) out[(i) * 3] + * #define NREGS_READ(out, i) out[(i) * 3 + 1] + * #define SIZE_READ(out, i) out[(i) * 3 + 2] + * + * and then write e.g. NFREE_READ(oldp, i) to fetch the output. See the unit + * test test_batch in test/unit/extent_util.c for a concrete example. + * + * A typical workflow would be composed of the following steps: + * + * (1) flush tcache: mallctl("thread.tcache.flush", ...) + * (2) initialize input array of pointers to query fragmentation + * (3) allocate output array to hold utilization statistics + * (4) query utilization: mallctl("experimental.utilization.batch_query", ...) + * (5) (optional) decide if it's worthwhile to defragment; otherwise stop here + * (6) disable tcache: mallctl("thread.tcache.enabled", ...) + * (7) defragment allocations with significant fragmentation, e.g.: + * for each allocation { + * if it's fragmented { + * malloc(...); + * memcpy(...); + * free(...); + * } + * } + * (8) enable tcache: mallctl("thread.tcache.enabled", ...) + * + * The application can determine the significance of fragmentation themselves + * relying on the statistics returned, both at the overall level i.e. step "(5)" + * and at individual allocation level i.e. within step "(7)". Possible choices + * are: + * + * (a) whether memory utilization ratio is below certain threshold, + * (b) whether memory consumption is above certain threshold, or + * (c) some combination of the two. + * + * The caller needs to make sure that the input/output arrays are valid and + * their sizes are proper as well as matched, meaning: + * + * (a) newlen = n_pointers * sizeof(const void *) + * (b) *oldlenp = n_pointers * sizeof(size_t) * 3 + * (c) n_pointers > 0 + * + * Otherwise, the function immediately returns EINVAL without touching anything. + * + * In the rare case where there's no associated extent found for some pointers, + * rather than immediately terminating the computation and raising an error, + * the function simply zeros out the corresponding output fields and continues + * the computation until all input pointers are handled. The motivations of + * such a design are as follows: + * + * (a) The function always either processes nothing or processes everything, and + * never leaves the output half touched and half untouched. + * + * (b) It facilitates usage needs especially common in C++. A vast variety of + * C++ objects are instantiated with multiple dynamic memory allocations. For + * example, std::string and std::vector typically use at least two allocations, + * one for the metadata and one for the actual content. Other types may use + * even more allocations. When inquiring about utilization statistics, the + * caller often wants to examine into all such allocations, especially internal + * one(s), rather than just the topmost one. The issue comes when some + * implementations do certain optimizations to reduce/aggregate some internal + * allocations, e.g. putting short strings directly into the metadata, and such + * decisions are not known to the caller. Therefore, we permit pointers to + * memory usages that may not be returned by previous malloc calls, and we + * provide the caller a convenient way to identify such cases. + */ +static int +experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + + assert(sizeof(extent_util_stats_t) == sizeof(size_t) * 3); + + const size_t len = newlen / sizeof(const void *); + if (oldp == NULL || oldlenp == NULL || newp == NULL || newlen == 0 + || newlen != len * sizeof(const void *) + || *oldlenp != len * sizeof(extent_util_stats_t)) { + ret = EINVAL; + goto label_return; + } + + void **ptrs = (void **)newp; + extent_util_stats_t *util_stats = (extent_util_stats_t *)oldp; + size_t i; + for (i = 0; i < len; ++i) { + extent_util_stats_get(tsd_tsdn(tsd), ptrs[i], + &util_stats[i].nfree, &util_stats[i].nregs, + &util_stats[i].size); + } + ret = 0; + +label_return: + return ret; +} + +static const ctl_named_node_t * +experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib, + size_t miblen, size_t i) { + const ctl_named_node_t *ret; + + malloc_mutex_lock(tsdn, &ctl_mtx); + if (ctl_arenas_i_verify(i)) { + ret = NULL; + goto label_return; + } + ret = super_experimental_arenas_i_node; +label_return: + malloc_mutex_unlock(tsdn, &ctl_mtx); + return ret; +} + +static int +experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + if (!config_stats) { + return ENOENT; + } + if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(size_t *)) { + return EINVAL; + } + + unsigned arena_ind; + arena_t *arena; + int ret; + size_t *pactivep; + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + READONLY(); + MIB_UNSIGNED(arena_ind, 2); + if (arena_ind < narenas_total_get() && (arena = + arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { +#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) || \ + defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER) + /* Expose the underlying counter for fast read. */ + pactivep = (size_t *)&(arena->nactive.repr); + READ(pactivep, size_t *); + ret = 0; +#else + ret = EFAULT; +#endif + } else { + ret = EFAULT; + } +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} diff --git a/skynet/3rd/jemalloc/src/extent.c b/skynet/3rd/jemalloc/src/extent.c index 3396a9d..e83d9c8 100644 --- a/skynet/3rd/jemalloc/src/extent.c +++ b/skynet/3rd/jemalloc/src/extent.c @@ -441,30 +441,6 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size, return NULL; } -/* Do any-best-fit extent selection, i.e. select any extent that best fits. */ -static extent_t * -extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - size_t size) { - pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size)); - pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, - (size_t)pind); - if (i < SC_NPSIZES + 1) { - /* - * In order to reduce fragmentation, avoid reusing and splitting - * large extents for much smaller sizes. - */ - if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) { - return NULL; - } - assert(!extent_heap_empty(&extents->heaps[i])); - extent_t *extent = extent_heap_first(&extents->heaps[i]); - assert(extent_size_get(extent) >= size); - return extent; - } - - return NULL; -} - /* * Do first-fit extent selection, i.e. select the oldest/lowest extent that is * large enough. @@ -483,7 +459,19 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, assert(!extent_heap_empty(&extents->heaps[i])); extent_t *extent = extent_heap_first(&extents->heaps[i]); assert(extent_size_get(extent) >= size); - if (ret == NULL || extent_snad_comp(extent, ret) < 0) { + bool size_ok = true; + /* + * In order to reduce fragmentation, avoid reusing and splitting + * large extents for much smaller sizes. + * + * Only do check for dirty extents (delay_coalesce). + */ + if (extents->delay_coalesce && + (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) { + size_ok = false; + } + if (size_ok && + (ret == NULL || extent_snad_comp(extent, ret) < 0)) { ret = extent; } if (i == SC_NPSIZES) { @@ -496,10 +484,8 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, } /* - * Do {best,first}-fit extent selection, where the selection policy choice is - * based on extents->delay_coalesce. Best-fit selection requires less - * searching, but its layout policy is less stable and may cause higher virtual - * memory fragmentation as a side effect. + * Do first-fit extent selection, where the selection policy choice is + * based on extents->delay_coalesce. */ static extent_t * extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, @@ -512,8 +498,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, return NULL; } - extent_t *extent = extents->delay_coalesce ? - extents_best_fit_locked(tsdn, arena, extents, max_size) : + extent_t *extent = extents_first_fit_locked(tsdn, arena, extents, max_size); if (alignment > PAGE && extent == NULL) { @@ -796,6 +781,7 @@ extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) { if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true, &elm_a, &elm_b)) { + extent_unlock(tsdn, extent); return true; } @@ -1255,7 +1241,7 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size, assert(arena != NULL); return extent_alloc_default_impl(tsdn, arena, new_addr, size, - alignment, zero, commit); + ALIGNMENT_CEILING(alignment, PAGE), zero, commit); } static void @@ -1492,14 +1478,15 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, return NULL; } void *addr; + size_t palignment = ALIGNMENT_CEILING(alignment, PAGE); if (*r_extent_hooks == &extent_hooks_default) { /* Call directly to propagate tsdn. */ addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize, - alignment, zero, commit); + palignment, zero, commit); } else { extent_hook_pre_reentrancy(tsdn, arena); addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, - esize, alignment, zero, commit, arena_ind_get(arena)); + esize, palignment, zero, commit, arena_ind_get(arena)); extent_hook_post_reentrancy(tsdn); } if (addr == NULL) { @@ -2279,3 +2266,72 @@ extent_boot(void) { return false; } + +void +extent_util_stats_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size) { + assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL); + + const extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(extent == NULL)) { + *nfree = *nregs = *size = 0; + return; + } + + *size = extent_size_get(extent); + if (!extent_slab_get(extent)) { + *nfree = 0; + *nregs = 1; + } else { + *nfree = extent_nfree_get(extent); + *nregs = bin_infos[extent_szind_get(extent)].nregs; + assert(*nfree <= *nregs); + assert(*nfree * extent_usize_get(extent) <= *size); + } +} + +void +extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr, + size_t *nfree, size_t *nregs, size_t *size, + size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) { + assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL + && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL); + + const extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(extent == NULL)) { + *nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0; + *slabcur_addr = NULL; + return; + } + + *size = extent_size_get(extent); + if (!extent_slab_get(extent)) { + *nfree = *bin_nfree = *bin_nregs = 0; + *nregs = 1; + *slabcur_addr = NULL; + return; + } + + *nfree = extent_nfree_get(extent); + const szind_t szind = extent_szind_get(extent); + *nregs = bin_infos[szind].nregs; + assert(*nfree <= *nregs); + assert(*nfree * extent_usize_get(extent) <= *size); + + const arena_t *arena = extent_arena_get(extent); + assert(arena != NULL); + const unsigned binshard = extent_binshard_get(extent); + bin_t *bin = &arena->bins[szind].bin_shards[binshard]; + + malloc_mutex_lock(tsdn, &bin->lock); + if (config_stats) { + *bin_nregs = *nregs * bin->stats.curslabs; + assert(*bin_nregs >= bin->stats.curregs); + *bin_nfree = *bin_nregs - bin->stats.curregs; + } else { + *bin_nfree = *bin_nregs = 0; + } + *slabcur_addr = extent_addr_get(bin->slabcur); + assert(*slabcur_addr != NULL); + malloc_mutex_unlock(tsdn, &bin->lock); +} diff --git a/skynet/3rd/jemalloc/src/extent_dss.c b/skynet/3rd/jemalloc/src/extent_dss.c index 6c56cf6..69a7bee 100644 --- a/skynet/3rd/jemalloc/src/extent_dss.c +++ b/skynet/3rd/jemalloc/src/extent_dss.c @@ -113,7 +113,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, cassert(have_dss); assert(size > 0); - assert(alignment > 0); + assert(alignment == ALIGNMENT_CEILING(alignment, PAGE)); /* * sbrk() uses a signed increment argument, so take care not to diff --git a/skynet/3rd/jemalloc/src/extent_mmap.c b/skynet/3rd/jemalloc/src/extent_mmap.c index 8d607dc..17fd1c8 100644 --- a/skynet/3rd/jemalloc/src/extent_mmap.c +++ b/skynet/3rd/jemalloc/src/extent_mmap.c @@ -21,8 +21,8 @@ bool opt_retain = void * extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment, - PAGE), commit); + assert(alignment == ALIGNMENT_CEILING(alignment, PAGE)); + void *ret = pages_map(new_addr, size, alignment, commit); if (ret == NULL) { return NULL; } diff --git a/skynet/3rd/jemalloc/src/jemalloc.c b/skynet/3rd/jemalloc/src/jemalloc.c index 855a98b..1e99a59 100644 --- a/skynet/3rd/jemalloc/src/jemalloc.c +++ b/skynet/3rd/jemalloc/src/jemalloc.c @@ -13,6 +13,7 @@ #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/spin.h" #include "jemalloc/internal/sz.h" @@ -42,6 +43,8 @@ bool opt_abort_conf = false #endif ; +/* Intentionally default off, even with debug builds. */ +bool opt_confirm_conf = false; const char *opt_junk = #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) "true" @@ -928,93 +931,140 @@ malloc_slow_flag_init(void) { malloc_slow = (malloc_slow_flags != 0); } -static void -malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { - unsigned i; - char buf[PATH_MAX + 1]; - const char *opts, *k, *v; - size_t klen, vlen; +/* Number of sources for initializing malloc_conf */ +#define MALLOC_CONF_NSOURCES 4 - for (i = 0; i < 4; i++) { - /* Get runtime configuration. */ - switch (i) { - case 0: - opts = config_malloc_conf; - break; - case 1: - if (je_malloc_conf != NULL) { - /* - * Use options that were compiled into the - * program. - */ - opts = je_malloc_conf; - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 2: { - ssize_t linklen = 0; +static const char * +obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) { + if (config_debug) { + static unsigned read_source = 0; + /* + * Each source should only be read once, to minimize # of + * syscalls on init. + */ + assert(read_source++ == which_source); + } + assert(which_source < MALLOC_CONF_NSOURCES); + + const char *ret; + switch (which_source) { + case 0: + ret = config_malloc_conf; + break; + case 1: + if (je_malloc_conf != NULL) { + /* Use options that were compiled into the program. */ + ret = je_malloc_conf; + } else { + /* No configuration specified. */ + ret = NULL; + } + break; + case 2: { + ssize_t linklen = 0; #ifndef _WIN32 - int saved_errno = errno; - const char *linkname = + int saved_errno = errno; + const char *linkname = # ifdef JEMALLOC_PREFIX - "/etc/"JEMALLOC_PREFIX"malloc.conf" + "/etc/"JEMALLOC_PREFIX"malloc.conf" # else - "/etc/malloc.conf" + "/etc/malloc.conf" # endif - ; + ; - /* - * Try to use the contents of the "/etc/malloc.conf" - * symbolic link's name. - */ + /* + * Try to use the contents of the "/etc/malloc.conf" symbolic + * link's name. + */ #ifndef JEMALLOC_READLINKAT - linklen = readlink(linkname, buf, sizeof(buf) - 1); + linklen = readlink(linkname, buf, PATH_MAX); #else - linklen = readlinkat(AT_FDCWD, linkname, buf, - sizeof(buf) - 1); + linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX); #endif - if (linklen == -1) { - /* No configuration specified. */ - linklen = 0; - /* Restore errno. */ - set_errno(saved_errno); - } + if (linklen == -1) { + /* No configuration specified. */ + linklen = 0; + /* Restore errno. */ + set_errno(saved_errno); + } #endif - buf[linklen] = '\0'; - opts = buf; - break; - } case 3: { - const char *envname = + buf[linklen] = '\0'; + ret = buf; + break; + } case 3: { + const char *envname = #ifdef JEMALLOC_PREFIX - JEMALLOC_CPREFIX"MALLOC_CONF" + JEMALLOC_CPREFIX"MALLOC_CONF" #else - "MALLOC_CONF" + "MALLOC_CONF" #endif - ; + ; - if ((opts = jemalloc_secure_getenv(envname)) != NULL) { - /* - * Do nothing; opts is already initialized to - * the value of the MALLOC_CONF environment - * variable. - */ - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - } default: - not_reached(); - buf[0] = '\0'; - opts = buf; + if ((ret = jemalloc_secure_getenv(envname)) != NULL) { + /* + * Do nothing; opts is already initialized to the value + * of the MALLOC_CONF environment variable. + */ + } else { + /* No configuration specified. */ + ret = NULL; + } + break; + } default: + not_reached(); + ret = NULL; + } + return ret; +} + +static void +malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], + bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES], + char buf[PATH_MAX + 1]) { + static const char *opts_explain[MALLOC_CONF_NSOURCES] = { + "string specified via --with-malloc-conf", + "string pointed to by the global variable malloc_conf", + "\"name\" of the file referenced by the symbolic link named " + "/etc/malloc.conf", + "value of the environment variable MALLOC_CONF" + }; + unsigned i; + const char *opts, *k, *v; + size_t klen, vlen; + + for (i = 0; i < MALLOC_CONF_NSOURCES; i++) { + /* Get runtime configuration. */ + if (initial_call) { + opts_cache[i] = obtain_malloc_conf(i, buf); + } + opts = opts_cache[i]; + if (!initial_call && opt_confirm_conf) { + malloc_printf( + ": malloc_conf #%u (%s): \"%s\"\n", + i + 1, opts_explain[i], opts != NULL ? opts : ""); + } + if (opts == NULL) { + continue; } while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v, &vlen)) { + +#define CONF_ERROR(msg, k, klen, v, vlen) \ + if (!initial_call) { \ + malloc_conf_error( \ + msg, k, klen, v, vlen); \ + cur_opt_valid = false; \ + } +#define CONF_CONTINUE { \ + if (!initial_call && opt_confirm_conf \ + && cur_opt_valid) { \ + malloc_printf(": Set "\ + "conf value: %.*s:%.*s\n", \ + (int)klen, k, (int)vlen, v);\ + } \ + continue; \ + } #define CONF_MATCH(n) \ (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0) #define CONF_MATCH_VALUE(n) \ @@ -1026,11 +1076,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { } else if (CONF_MATCH_VALUE("false")) { \ o = false; \ } else { \ - malloc_conf_error( \ - "Invalid conf value", \ + CONF_ERROR("Invalid conf value",\ k, klen, v, vlen); \ } \ - continue; \ + CONF_CONTINUE; \ } /* * One of the CONF_MIN macros below expands, in one of the use points, @@ -1040,10 +1089,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS -#define CONF_MIN_no(um, min) false -#define CONF_MIN_yes(um, min) ((um) < (min)) -#define CONF_MAX_no(um, max) false -#define CONF_MAX_yes(um, max) ((um) > (max)) +#define CONF_DONT_CHECK_MIN(um, min) false +#define CONF_CHECK_MIN(um, min) ((um) < (min)) +#define CONF_DONT_CHECK_MAX(um, max) false +#define CONF_CHECK_MAX(um, max) ((um) > (max)) #define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip) \ if (CONF_MATCH(n)) { \ uintmax_t um; \ @@ -1053,26 +1102,21 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { um = malloc_strtoumax(v, &end, 0); \ if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ + CONF_ERROR("Invalid conf value",\ k, klen, v, vlen); \ } else if (clip) { \ - if (CONF_MIN_##check_min(um, \ - (t)(min))) { \ + if (check_min(um, (t)(min))) { \ o = (t)(min); \ } else if ( \ - CONF_MAX_##check_max(um, \ - (t)(max))) { \ + check_max(um, (t)(max))) { \ o = (t)(max); \ } else { \ o = (t)um; \ } \ } else { \ - if (CONF_MIN_##check_min(um, \ - (t)(min)) || \ - CONF_MAX_##check_max(um, \ - (t)(max))) { \ - malloc_conf_error( \ + if (check_min(um, (t)(min)) || \ + check_max(um, (t)(max))) { \ + CONF_ERROR( \ "Out-of-range " \ "conf value", \ k, klen, v, vlen); \ @@ -1080,7 +1124,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { o = (t)um; \ } \ } \ - continue; \ + CONF_CONTINUE; \ } #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, \ clip) \ @@ -1098,18 +1142,17 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { l = strtol(v, &end, 0); \ if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ + CONF_ERROR("Invalid conf value",\ k, klen, v, vlen); \ } else if (l < (ssize_t)(min) || l > \ (ssize_t)(max)) { \ - malloc_conf_error( \ + CONF_ERROR( \ "Out-of-range conf value", \ k, klen, v, vlen); \ } else { \ o = l; \ } \ - continue; \ + CONF_CONTINUE; \ } #define CONF_HANDLE_CHAR_P(o, n, d) \ if (CONF_MATCH(n)) { \ @@ -1118,7 +1161,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { sizeof(o)-1; \ strncpy(o, v, cpylen); \ o[cpylen] = '\0'; \ - continue; \ + CONF_CONTINUE; \ + } + + bool cur_opt_valid = true; + + CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf") + if (initial_call) { + continue; } CONF_HANDLE_BOOL(opt_abort, "abort") @@ -1135,10 +1185,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { } } if (!match) { - malloc_conf_error("Invalid conf value", + CONF_ERROR("Invalid conf value", k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_BOOL(opt_retain, "retain") if (strncmp("dss", k, klen) == 0) { @@ -1148,7 +1198,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { if (strncmp(dss_prec_names[i], v, vlen) == 0) { if (extent_dss_prec_set(i)) { - malloc_conf_error( + CONF_ERROR( "Error setting dss", k, klen, v, vlen); } else { @@ -1160,13 +1210,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { } } if (!match) { - malloc_conf_error("Invalid conf value", + CONF_ERROR("Invalid conf value", k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1, - UINT_MAX, yes, no, false) + UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, + false) if (CONF_MATCH("bin_shards")) { const char *bin_shards_segment_cur = v; size_t vlen_left = vlen; @@ -1180,14 +1231,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { if (err || bin_update_shard_size( bin_shard_sizes, size_start, size_end, nshards)) { - malloc_conf_error( + CONF_ERROR( "Invalid settings for " "bin_shards", k, klen, v, vlen); break; } } while (vlen_left > 0); - continue; + CONF_CONTINUE; } CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms, "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) < @@ -1200,7 +1251,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (CONF_MATCH("stats_print_opts")) { init_opt_stats_print_opts(v, vlen); - continue; + CONF_CONTINUE; } if (config_fill) { if (CONF_MATCH("junk")) { @@ -1221,11 +1272,11 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { opt_junk_alloc = false; opt_junk_free = true; } else { - malloc_conf_error( - "Invalid conf value", k, - klen, v, vlen); + CONF_ERROR( + "Invalid conf value", + k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_BOOL(opt_zero, "zero") } @@ -1248,11 +1299,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { * contention on the huge arena. */ CONF_HANDLE_SIZE_T(opt_oversize_threshold, - "oversize_threshold", SC_LARGE_MINCLASS, - SC_LARGE_MAXCLASS, yes, yes, false) + "oversize_threshold", 0, SC_LARGE_MAXCLASS, + CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false) CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit, "lg_extent_max_active_fit", 0, - (sizeof(size_t) << 3), yes, yes, false) + (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN, + CONF_CHECK_MAX, false) if (strncmp("percpu_arena", k, klen) == 0) { bool match = false; @@ -1261,7 +1313,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { if (strncmp(percpu_arena_mode_names[i], v, vlen) == 0) { if (!have_percpu_arena) { - malloc_conf_error( + CONF_ERROR( "No getcpu support", k, klen, v, vlen); } @@ -1271,16 +1323,17 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { } } if (!match) { - malloc_conf_error("Invalid conf value", + CONF_ERROR("Invalid conf value", k, klen, v, vlen); } - continue; + CONF_CONTINUE; } CONF_HANDLE_BOOL(opt_background_thread, "background_thread"); CONF_HANDLE_SIZE_T(opt_max_background_threads, "max_background_threads", 1, - opt_max_background_threads, yes, yes, + opt_max_background_threads, + CONF_CHECK_MIN, CONF_CHECK_MAX, true); if (CONF_MATCH("slab_sizes")) { bool err; @@ -1299,13 +1352,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { sc_data, slab_start, slab_end, (int)pgs); } else { - malloc_conf_error( - "Invalid settings for " - "slab_sizes", k, klen, v, - vlen); + CONF_ERROR("Invalid settings " + "for slab_sizes", + k, klen, v, vlen); } } while (!err && vlen_left > 0); - continue; + CONF_CONTINUE; } if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof") @@ -1316,7 +1368,8 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { "prof_thread_active_init") CONF_HANDLE_SIZE_T(opt_lg_prof_sample, "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - - 1, no, yes, true) + - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, + true) CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, "lg_prof_interval", -1, @@ -1333,7 +1386,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { vlen : sizeof(log_var_names) - 1); strncpy(log_var_names, v, cpylen); log_var_names[cpylen] = '\0'; - continue; + CONF_CONTINUE; } } if (CONF_MATCH("thp")) { @@ -1342,7 +1395,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { if (strncmp(thp_mode_names[i],v, vlen) == 0) { if (!have_madvise_huge) { - malloc_conf_error( + CONF_ERROR( "No THP support", k, klen, v, vlen); } @@ -1352,20 +1405,21 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { } } if (!match) { - malloc_conf_error("Invalid conf value", + CONF_ERROR("Invalid conf value", k, klen, v, vlen); } - continue; + CONF_CONTINUE; } - malloc_conf_error("Invalid conf pair", k, klen, v, - vlen); + CONF_ERROR("Invalid conf pair", k, klen, v, vlen); +#undef CONF_ERROR +#undef CONF_CONTINUE #undef CONF_MATCH #undef CONF_MATCH_VALUE #undef CONF_HANDLE_BOOL -#undef CONF_MIN_no -#undef CONF_MIN_yes -#undef CONF_MAX_no -#undef CONF_MAX_yes +#undef CONF_DONT_CHECK_MIN +#undef CONF_CHECK_MIN +#undef CONF_DONT_CHECK_MAX +#undef CONF_CHECK_MAX #undef CONF_HANDLE_T_U #undef CONF_HANDLE_UNSIGNED #undef CONF_HANDLE_SIZE_T @@ -1381,6 +1435,19 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { atomic_store_b(&log_init_done, true, ATOMIC_RELEASE); } +static void +malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) { + const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL}; + char buf[PATH_MAX + 1]; + + /* The first call only set the confirm_conf option and opts_cache */ + malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf); + malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache, + NULL); +} + +#undef MALLOC_CONF_NSOURCES + static bool malloc_init_hard_needed(void) { if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state == @@ -2732,7 +2799,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { tcache_t *tcache = tsd_tcachep_get(tsd); alloc_ctx_t alloc_ctx; - /* + /* * If !config_cache_oblivious, we can check PAGE alignment to * detect sampled objects. Otherwise addresses are * randomized, and we have to look it up in the rtree anyway. @@ -2743,12 +2810,12 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != SC_NSIZES); /* Note: profiled objects will have alloc_ctx.slab set */ if (!res || !alloc_ctx.slab) { return false; } + assert(alloc_ctx.szind != SC_NSIZES); } else { /* * Check for both sizes that are too large, and for sampled objects. @@ -3522,6 +3589,18 @@ je_sdallocx(void *ptr, size_t size, int flags) { LOG("core.sdallocx.exit", ""); } +void JEMALLOC_NOTHROW +je_sdallocx_noflags(void *ptr, size_t size) { + LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr, + size); + + if (!free_fastpath(ptr, size, true)) { + sdallocx_default(ptr, size, 0); + } + + LOG("core.sdallocx.exit", ""); +} + JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) { diff --git a/skynet/3rd/jemalloc/src/jemalloc_cpp.cpp b/skynet/3rd/jemalloc/src/jemalloc_cpp.cpp index f0cedda..da0441a 100644 --- a/skynet/3rd/jemalloc/src/jemalloc_cpp.cpp +++ b/skynet/3rd/jemalloc/src/jemalloc_cpp.cpp @@ -128,14 +128,14 @@ operator delete(void *ptr, std::size_t size) noexcept { if (unlikely(ptr == nullptr)) { return; } - je_sdallocx(ptr, size, /*flags=*/0); + je_sdallocx_noflags(ptr, size); } void operator delete[](void *ptr, std::size_t size) noexcept { if (unlikely(ptr == nullptr)) { return; } - je_sdallocx(ptr, size, /*flags=*/0); + je_sdallocx_noflags(ptr, size); } #endif // __cpp_sized_deallocation diff --git a/skynet/3rd/jemalloc/src/malloc_io.c b/skynet/3rd/jemalloc/src/malloc_io.c index 7bdc13f..dd88265 100644 --- a/skynet/3rd/jemalloc/src/malloc_io.c +++ b/skynet/3rd/jemalloc/src/malloc_io.c @@ -362,7 +362,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) { } \ } while (0) #define GET_ARG_NUMERIC(val, len) do { \ - switch (len) { \ + switch ((unsigned char)len) { \ case '?': \ val = va_arg(ap, int); \ break; \ diff --git a/skynet/3rd/jemalloc/src/prof.c b/skynet/3rd/jemalloc/src/prof.c index 296de52..a4e30f4 100644 --- a/skynet/3rd/jemalloc/src/prof.c +++ b/skynet/3rd/jemalloc/src/prof.c @@ -125,7 +125,7 @@ struct prof_thr_node_s { uint64_t thr_uid; /* Variable size based on thr_name_sz. */ char name[1]; -}; +}; typedef struct prof_alloc_node_s prof_alloc_node_t; @@ -376,7 +376,8 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) { size_t sz = offsetof(prof_bt_node_t, vec) + (bt->len * sizeof(void *)); prof_bt_node_t *new_node = (prof_bt_node_t *) - ialloc(tsd, sz, sz_size2index(sz), false, true); + iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, + true, arena_get(TSDN_NULL, 0, true), true); if (log_bt_first == NULL) { log_bt_first = new_node; log_bt_last = new_node; @@ -387,7 +388,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) { new_node->next = NULL; new_node->index = log_bt_index; - /* + /* * Copy the backtrace: bt is inside a tdata or gctx, which * might die before prof_log_stop is called. */ @@ -401,7 +402,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) { } else { return node->index; } -} +} static size_t prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) { assert(prof_logging_state == prof_logging_state_started); @@ -416,7 +417,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) { (void **)(&node), NULL)) { size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1; prof_thr_node_t *new_node = (prof_thr_node_t *) - ialloc(tsd, sz, sz_size2index(sz), false, true); + iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, + true, arena_get(TSDN_NULL, 0, true), true); if (log_thr_first == NULL) { log_thr_first = new_node; log_thr_last = new_node; @@ -450,7 +452,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { * it's being destroyed). */ return; - } + } malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx); @@ -474,10 +476,11 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { nstime_t free_time = NSTIME_ZERO_INITIALIZER; nstime_update(&free_time); + size_t sz = sizeof(prof_alloc_node_t); prof_alloc_node_t *new_node = (prof_alloc_node_t *) - ialloc(tsd, sizeof(prof_alloc_node_t), - sz_size2index(sizeof(prof_alloc_node_t)), false, true); - + iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true, + arena_get(TSDN_NULL, 0, true), true); + const char *prod_thr_name = (tctx->tdata->thread_name == NULL)? "" : tctx->tdata->thread_name; const char *cons_thr_name = prof_thread_name_get(tsd); @@ -511,11 +514,11 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { } label_done: - malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx); + malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx); } void -prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, +prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) { malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); @@ -2601,8 +2604,8 @@ static void prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) { emitter_json_array_kv_begin(emitter, "stack_traces"); prof_bt_node_t *bt_node = log_bt_first; - prof_bt_node_t *bt_old_node; - /* + prof_bt_node_t *bt_old_node; + /* * Calculate how many hex digits we need: twice number of bytes, two for * "0x", and then one more for terminating '\0'. */ diff --git a/skynet/3rd/jemalloc/src/safety_check.c b/skynet/3rd/jemalloc/src/safety_check.c new file mode 100644 index 0000000..804155d --- /dev/null +++ b/skynet/3rd/jemalloc/src/safety_check.c @@ -0,0 +1,24 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +static void (*safety_check_abort)(const char *message); + +void safety_check_set_abort(void (*abort_fn)(const char *)) { + safety_check_abort = abort_fn; +} + +void safety_check_fail(const char *format, ...) { + char buf[MALLOC_PRINTF_BUFSIZE]; + + va_list ap; + va_start(ap, format); + malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap); + va_end(ap); + + if (safety_check_abort == NULL) { + malloc_write(buf); + abort(); + } else { + safety_check_abort(buf); + } +} diff --git a/skynet/3rd/jemalloc/src/stats.c b/skynet/3rd/jemalloc/src/stats.c index 4c427e0..bce9f45 100644 --- a/skynet/3rd/jemalloc/src/stats.c +++ b/skynet/3rd/jemalloc/src/stats.c @@ -294,6 +294,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti COL_HDR(row, nshards, NULL, right, 9, unsigned) COL_HDR(row, curregs, NULL, right, 13, size) COL_HDR(row, curslabs, NULL, right, 13, size) + COL_HDR(row, nonfull_slabs, NULL, right, 15, size) COL_HDR(row, regs, NULL, right, 5, unsigned) COL_HDR(row, pgs, NULL, right, 4, size) /* To buffer a right- and left-justified column. */ @@ -337,6 +338,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti uint64_t nslabs; size_t reg_size, slab_size, curregs; size_t curslabs; + size_t nonfull_slabs; uint32_t nregs, nshards; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t nreslabs; @@ -372,6 +374,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti uint64_t); CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs, size_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.nonfull_slabs", i, j, &nonfull_slabs, + size_t); if (mutex) { mutex_stats_read_arena_bin(i, j, col_mutex64, @@ -395,6 +399,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti &nreslabs); emitter_json_kv(emitter, "curslabs", emitter_type_size, &curslabs); + emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size, + &nonfull_slabs); if (mutex) { emitter_json_object_kv_begin(emitter, "mutex"); mutex_stats_emit(emitter, NULL, col_mutex64, @@ -434,6 +440,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti col_nshards.unsigned_val = nshards; col_curregs.size_val = curregs; col_curslabs.size_val = curslabs; + col_nonfull_slabs.size_val = nonfull_slabs; col_regs.unsigned_val = nregs; col_pgs.size_val = slab_size / page; col_util.str_val = util; @@ -661,9 +668,11 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged; size_t small_allocated; - uint64_t small_nmalloc, small_ndalloc, small_nrequests; + uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills, + small_nflushes; size_t large_allocated; - uint64_t large_nmalloc, large_ndalloc, large_nrequests; + uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills, + large_nflushes; size_t tcache_bytes; uint64_t uptime; @@ -821,11 +830,23 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, COL(alloc_count_row, count_nrequests_ps, right, 10, title); col_count_nrequests_ps.str_val = "(#/sec)"; + COL(alloc_count_row, count_nfills, right, 16, title); + col_count_nfills.str_val = "nfill"; + COL(alloc_count_row, count_nfills_ps, right, 10, title); + col_count_nfills_ps.str_val = "(#/sec)"; + + COL(alloc_count_row, count_nflushes, right, 16, title); + col_count_nflushes.str_val = "nflush"; + COL(alloc_count_row, count_nflushes_ps, right, 10, title); + col_count_nflushes_ps.str_val = "(#/sec)"; + emitter_table_row(emitter, &alloc_count_row); col_count_nmalloc_ps.type = emitter_type_uint64; col_count_ndalloc_ps.type = emitter_type_uint64; col_count_nrequests_ps.type = emitter_type_uint64; + col_count_nfills_ps.type = emitter_type_uint64; + col_count_nflushes_ps.type = emitter_type_uint64; #define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype) \ CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i, \ @@ -848,6 +869,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64) col_count_nrequests_ps.uint64_val = rate_per_second(col_count_nrequests.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64) + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64) + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); emitter_table_row(emitter, &alloc_count_row); emitter_json_object_end(emitter); /* Close "small". */ @@ -865,6 +892,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64) col_count_nrequests_ps.uint64_val = rate_per_second(col_count_nrequests.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64) + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64) + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); emitter_table_row(emitter, &alloc_count_row); emitter_json_object_end(emitter); /* Close "large". */ @@ -877,12 +910,18 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc; col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc; col_count_nrequests.uint64_val = small_nrequests + large_nrequests; + col_count_nfills.uint64_val = small_nfills + large_nfills; + col_count_nflushes.uint64_val = small_nflushes + large_nflushes; col_count_nmalloc_ps.uint64_val = rate_per_second(col_count_nmalloc.uint64_val, uptime); col_count_ndalloc_ps.uint64_val = rate_per_second(col_count_ndalloc.uint64_val, uptime); col_count_nrequests_ps.uint64_val = rate_per_second(col_count_nrequests.uint64_val, uptime); + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); emitter_table_row(emitter, &alloc_count_row); emitter_row_t mem_count_row; @@ -976,6 +1015,7 @@ stats_general_print(emitter_t *emitter) { emitter_kv(emitter, "malloc_conf", "config.malloc_conf", emitter_type_string, &config_malloc_conf); + CONFIG_WRITE_BOOL(opt_safety_checks); CONFIG_WRITE_BOOL(prof); CONFIG_WRITE_BOOL(prof_libgcc); CONFIG_WRITE_BOOL(prof_libunwind); @@ -1025,6 +1065,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_BOOL("abort") OPT_WRITE_BOOL("abort_conf") + OPT_WRITE_BOOL("confirm_conf") OPT_WRITE_BOOL("retain") OPT_WRITE_CHAR_P("dss") OPT_WRITE_UNSIGNED("narenas") diff --git a/skynet/3rd/jemalloc/src/tcache.c b/skynet/3rd/jemalloc/src/tcache.c index be4fb87..50099a9 100644 --- a/skynet/3rd/jemalloc/src/tcache.c +++ b/skynet/3rd/jemalloc/src/tcache.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/sc.h" /******************************************************************************/ @@ -101,7 +102,6 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, } /* Enabled with --enable-extra-size-check. */ -#ifdef JEMALLOC_EXTRA_SIZE_CHECK static void tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind, size_t nflush, extent_t **extents){ @@ -123,10 +123,12 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind, sz_sum -= szind; } if (sz_sum != 0) { + safety_check_fail(": size mismatch in thread cache " + "detected, likely caused by sized deallocation bugs by " + "application. Abort.\n"); abort(); } } -#endif void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, @@ -141,15 +143,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, unsigned nflush = tbin->ncached - rem; VARIABLE_ARRAY(extent_t *, item_extent, nflush); -#ifndef JEMALLOC_EXTRA_SIZE_CHECK /* Look up extent once per item. */ - for (unsigned i = 0 ; i < nflush; i++) { - item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i)); + if (config_opt_safety_checks) { + tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, + nflush, item_extent); + } else { + for (unsigned i = 0 ; i < nflush; i++) { + item_extent[i] = iealloc(tsd_tsdn(tsd), + *(tbin->avail - 1 - i)); + } } -#else - tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush, - item_extent); -#endif while (nflush > 0) { /* Lock the arena bin associated with the first object. */ extent_t *extent = item_extent[0]; @@ -279,8 +282,8 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, } if (config_stats) { merged_stats = true; - arena_stats_large_nrequests_add(tsd_tsdn(tsd), - &tcache_arena->stats, binind, + arena_stats_large_flush_nrequests_add( + tsd_tsdn(tsd), &tcache_arena->stats, binind, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } @@ -321,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_stats_large_nrequests_add(tsd_tsdn(tsd), + arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd), &tcache_arena->stats, binind, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } @@ -612,7 +615,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { for (; i < nhbins; i++) { cache_bin_t *tbin = tcache_large_bin_get(tcache, i); - arena_stats_large_nrequests_add(tsdn, &arena->stats, i, + arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } diff --git a/skynet/3rd/jemalloc/src/tsd.c b/skynet/3rd/jemalloc/src/tsd.c index d5fb4d6..a31f6b9 100644 --- a/skynet/3rd/jemalloc/src/tsd.c +++ b/skynet/3rd/jemalloc/src/tsd.c @@ -17,11 +17,11 @@ JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; -__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false; +JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; +JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; bool tsd_booted = false; #elif (defined(JEMALLOC_TLS)) -__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; +JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; pthread_key_t tsd_tsd; bool tsd_booted = false; #elif (defined(_WIN32)) diff --git a/skynet/3rd/jemalloc/test/unit/binshard.c b/skynet/3rd/jemalloc/test/unit/binshard.c index 406c46c..d7a8df8 100644 --- a/skynet/3rd/jemalloc/test/unit/binshard.c +++ b/skynet/3rd/jemalloc/test/unit/binshard.c @@ -82,6 +82,9 @@ thd_start(void *varg) { } TEST_BEGIN(test_bin_shard_mt) { + test_skip_if(have_percpu_arena && + PERCPU_ARENA_ENABLED(opt_percpu_arena)); + thd_t thds[NTHREADS]; unsigned i; for (i = 0; i < NTHREADS; i++) { diff --git a/skynet/3rd/jemalloc/test/unit/extent_util.c b/skynet/3rd/jemalloc/test/unit/extent_util.c new file mode 100644 index 0000000..97e55f0 --- /dev/null +++ b/skynet/3rd/jemalloc/test/unit/extent_util.c @@ -0,0 +1,269 @@ +#include "test/jemalloc_test.h" + +#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do { \ + assert_d_eq(mallctl("experimental.utilization." node, \ + a, b, c, d), EINVAL, "Should fail when " why_inval); \ + assert_zu_eq(out_sz, out_sz_ref, \ + "Output size touched when given invalid arguments"); \ + assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0, \ + "Output content touched when given invalid arguments"); \ +} while (0) + +#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval) \ + TEST_UTIL_EINVAL("query", a, b, c, d, why_inval) +#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval) \ + TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval) + +#define TEST_UTIL_VALID(node) do { \ + assert_d_eq(mallctl("experimental.utilization." node, \ + out, &out_sz, in, in_sz), 0, \ + "Should return 0 on correct arguments"); \ + assert_zu_eq(out_sz, out_sz_ref, "incorrect output size"); \ + assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0, \ + "Output content should be changed"); \ +} while (0) + +#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query") + +#define TEST_MAX_SIZE (1 << 20) + +TEST_BEGIN(test_query) { + size_t sz; + /* + * Select some sizes that can span both small and large sizes, and are + * numerically unrelated to any size boundaries. + */ + for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS; + sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) { + void *p = mallocx(sz, 0); + void **in = &p; + size_t in_sz = sizeof(const void *); + size_t out_sz = sizeof(void *) + sizeof(size_t) * 5; + void *out = mallocx(out_sz, 0); + void *out_ref = mallocx(out_sz, 0); + size_t out_sz_ref = out_sz; + + assert_ptr_not_null(p, + "test pointer allocation failed"); + assert_ptr_not_null(out, + "test output allocation failed"); + assert_ptr_not_null(out_ref, + "test reference output allocation failed"); + +#define SLABCUR_READ(out) (*(void **)out) +#define COUNTS(out) ((size_t *)((void **)out + 1)) +#define NFREE_READ(out) COUNTS(out)[0] +#define NREGS_READ(out) COUNTS(out)[1] +#define SIZE_READ(out) COUNTS(out)[2] +#define BIN_NFREE_READ(out) COUNTS(out)[3] +#define BIN_NREGS_READ(out) COUNTS(out)[4] + + SLABCUR_READ(out) = NULL; + NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1; + BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1; + memcpy(out_ref, out, out_sz); + + /* Test invalid argument(s) errors */ + TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, + "old is NULL"); + TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, + "oldlenp is NULL"); + TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz, + "newp is NULL"); + TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, + "newlen is zero"); + in_sz -= 1; + TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, + "invalid newlen"); + in_sz += 1; + out_sz_ref = out_sz -= 2 * sizeof(size_t); + TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, + "invalid *oldlenp"); + out_sz_ref = out_sz += 2 * sizeof(size_t); + + /* Examine output for valid call */ + TEST_UTIL_VALID("query"); + assert_zu_le(sz, SIZE_READ(out), + "Extent size should be at least allocation size"); + assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0, + "Extent size should be a multiple of page size"); + if (sz <= SC_SMALL_MAXCLASS) { + assert_zu_le(NFREE_READ(out), NREGS_READ(out), + "Extent free count exceeded region count"); + assert_zu_le(NREGS_READ(out), SIZE_READ(out), + "Extent region count exceeded size"); + assert_zu_ne(NREGS_READ(out), 0, + "Extent region count must be positive"); + assert_ptr_not_null(SLABCUR_READ(out), + "Current slab is null"); + assert_true(NFREE_READ(out) == 0 + || SLABCUR_READ(out) <= p, + "Allocation should follow first fit principle"); + if (config_stats) { + assert_zu_le(BIN_NFREE_READ(out), + BIN_NREGS_READ(out), + "Bin free count exceeded region count"); + assert_zu_ne(BIN_NREGS_READ(out), 0, + "Bin region count must be positive"); + assert_zu_le(NFREE_READ(out), + BIN_NFREE_READ(out), + "Extent free count exceeded bin free count"); + assert_zu_le(NREGS_READ(out), + BIN_NREGS_READ(out), + "Extent region count exceeded " + "bin region count"); + assert_zu_eq(BIN_NREGS_READ(out) + % NREGS_READ(out), 0, + "Bin region count isn't a multiple of " + "extent region count"); + assert_zu_le( + BIN_NFREE_READ(out) - NFREE_READ(out), + BIN_NREGS_READ(out) - NREGS_READ(out), + "Free count in other extents in the bin " + "exceeded region count in other extents " + "in the bin"); + assert_zu_le(NREGS_READ(out) - NFREE_READ(out), + BIN_NREGS_READ(out) - BIN_NFREE_READ(out), + "Extent utilized count exceeded " + "bin utilized count"); + } + } else { + assert_zu_eq(NFREE_READ(out), 0, + "Extent free count should be zero"); + assert_zu_eq(NREGS_READ(out), 1, + "Extent region count should be one"); + assert_ptr_null(SLABCUR_READ(out), + "Current slab must be null for large size classes"); + if (config_stats) { + assert_zu_eq(BIN_NFREE_READ(out), 0, + "Bin free count must be zero for " + "large sizes"); + assert_zu_eq(BIN_NREGS_READ(out), 0, + "Bin region count must be zero for " + "large sizes"); + } + } + +#undef BIN_NREGS_READ +#undef BIN_NFREE_READ +#undef SIZE_READ +#undef NREGS_READ +#undef NFREE_READ +#undef COUNTS +#undef SLABCUR_READ + + free(out_ref); + free(out); + free(p); + } +} +TEST_END + +TEST_BEGIN(test_batch) { + size_t sz; + /* + * Select some sizes that can span both small and large sizes, and are + * numerically unrelated to any size boundaries. + */ + for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS; + sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) { + void *p = mallocx(sz, 0); + void *q = mallocx(sz, 0); + void *in[] = {p, q}; + size_t in_sz = sizeof(const void *) * 2; + size_t out[] = {-1, -1, -1, -1, -1, -1}; + size_t out_sz = sizeof(size_t) * 6; + size_t out_ref[] = {-1, -1, -1, -1, -1, -1}; + size_t out_sz_ref = out_sz; + + assert_ptr_not_null(p, "test pointer allocation failed"); + assert_ptr_not_null(q, "test pointer allocation failed"); + + /* Test invalid argument(s) errors */ + TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, + "old is NULL"); + TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, + "oldlenp is NULL"); + TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz, + "newp is NULL"); + TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, + "newlen is zero"); + in_sz -= 1; + TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz, + "newlen is not an exact multiple"); + in_sz += 1; + out_sz_ref = out_sz -= 2 * sizeof(size_t); + TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz, + "*oldlenp is not an exact multiple"); + out_sz_ref = out_sz += 2 * sizeof(size_t); + in_sz -= sizeof(const void *); + TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz, + "*oldlenp and newlen do not match"); + in_sz += sizeof(const void *); + + /* Examine output for valid calls */ +#define TEST_EQUAL_REF(i, message) \ + assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message) + +#define NFREE_READ(out, i) out[(i) * 3] +#define NREGS_READ(out, i) out[(i) * 3 + 1] +#define SIZE_READ(out, i) out[(i) * 3 + 2] + + out_sz_ref = out_sz /= 2; + in_sz /= 2; + TEST_UTIL_BATCH_VALID; + assert_zu_le(sz, SIZE_READ(out, 0), + "Extent size should be at least allocation size"); + assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0, + "Extent size should be a multiple of page size"); + if (sz <= SC_SMALL_MAXCLASS) { + assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0), + "Extent free count exceeded region count"); + assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0), + "Extent region count exceeded size"); + assert_zu_ne(NREGS_READ(out, 0), 0, + "Extent region count must be positive"); + } else { + assert_zu_eq(NFREE_READ(out, 0), 0, + "Extent free count should be zero"); + assert_zu_eq(NREGS_READ(out, 0), 1, + "Extent region count should be one"); + } + TEST_EQUAL_REF(1, + "Should not overwrite content beyond what's needed"); + in_sz *= 2; + out_sz_ref = out_sz *= 2; + + memcpy(out_ref, out, 3 * sizeof(size_t)); + TEST_UTIL_BATCH_VALID; + TEST_EQUAL_REF(0, "Statistics should be stable across calls"); + if (sz <= SC_SMALL_MAXCLASS) { + assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1), + "Extent free count exceeded region count"); + } else { + assert_zu_eq(NFREE_READ(out, 0), 0, + "Extent free count should be zero"); + } + assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1), + "Extent region count should be same for same region size"); + assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1), + "Extent size should be same for same region size"); + +#undef SIZE_READ +#undef NREGS_READ +#undef NFREE_READ + +#undef TEST_EQUAL_REF + + free(q); + free(p); + } +} +TEST_END + +int +main(void) { + assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE, + "Test case cannot cover large classes"); + return test(test_query, test_batch); +} diff --git a/skynet/3rd/jemalloc/test/unit/mallctl.c b/skynet/3rd/jemalloc/test/unit/mallctl.c index 498f9e0..3a75ac0 100644 --- a/skynet/3rd/jemalloc/test/unit/mallctl.c +++ b/skynet/3rd/jemalloc/test/unit/mallctl.c @@ -159,6 +159,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(bool, abort_conf, always); + TEST_MALLCTL_OPT(bool, confirm_conf, always); TEST_MALLCTL_OPT(const char *, metadata_thp, always); TEST_MALLCTL_OPT(bool, retain, always); TEST_MALLCTL_OPT(const char *, dss, always); diff --git a/skynet/3rd/jemalloc/test/unit/prof_log.c b/skynet/3rd/jemalloc/test/unit/prof_log.c index 6a3464b..92fbd7c 100644 --- a/skynet/3rd/jemalloc/test/unit/prof_log.c +++ b/skynet/3rd/jemalloc/test/unit/prof_log.c @@ -125,12 +125,14 @@ TEST_BEGIN(test_prof_log_many_traces) { assert_rep(); } /* - * There should be 8 total backtraces: two for malloc/free in f1(), - * two for malloc/free in f2(), two for malloc/free in f3(), and then - * two for malloc/free in f1()'s call to f3(). + * There should be 8 total backtraces: two for malloc/free in f1(), two + * for malloc/free in f2(), two for malloc/free in f3(), and then two + * for malloc/free in f1()'s call to f3(). However compiler + * optimizations such as loop unrolling might generate more call sites. + * So >= 8 traces are expected. */ - assert_zu_eq(prof_log_bt_count(), 8, - "Wrong number of backtraces given sample workload"); + assert_zu_ge(prof_log_bt_count(), 8, + "Expect at least 8 backtraces given sample workload"); assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0, "Unexpected mallctl failure when stopping logging"); } diff --git a/skynet/3rd/jemalloc/test/unit/safety_check.c b/skynet/3rd/jemalloc/test/unit/safety_check.c new file mode 100644 index 0000000..bf4bd86 --- /dev/null +++ b/skynet/3rd/jemalloc/test/unit/safety_check.c @@ -0,0 +1,156 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/safety_check.h" + +/* + * Note that we get called through safety_check.sh, which turns on sampling for + * everything. + */ + +bool fake_abort_called; +void fake_abort(const char *message) { + (void)message; + fake_abort_called = true; +} + +TEST_BEGIN(test_malloc_free_overflow) { + test_skip_if(!config_prof); + test_skip_if(!config_opt_safety_checks); + + safety_check_set_abort(&fake_abort); + /* Buffer overflow! */ + char* ptr = malloc(128); + ptr[128] = 0; + free(ptr); + safety_check_set_abort(NULL); + + assert_b_eq(fake_abort_called, true, "Redzone check didn't fire."); + fake_abort_called = false; +} +TEST_END + +TEST_BEGIN(test_mallocx_dallocx_overflow) { + test_skip_if(!config_prof); + test_skip_if(!config_opt_safety_checks); + + safety_check_set_abort(&fake_abort); + /* Buffer overflow! */ + char* ptr = mallocx(128, 0); + ptr[128] = 0; + dallocx(ptr, 0); + safety_check_set_abort(NULL); + + assert_b_eq(fake_abort_called, true, "Redzone check didn't fire."); + fake_abort_called = false; +} +TEST_END + +TEST_BEGIN(test_malloc_sdallocx_overflow) { + test_skip_if(!config_prof); + test_skip_if(!config_opt_safety_checks); + + safety_check_set_abort(&fake_abort); + /* Buffer overflow! */ + char* ptr = malloc(128); + ptr[128] = 0; + sdallocx(ptr, 128, 0); + safety_check_set_abort(NULL); + + assert_b_eq(fake_abort_called, true, "Redzone check didn't fire."); + fake_abort_called = false; +} +TEST_END + +TEST_BEGIN(test_realloc_overflow) { + test_skip_if(!config_prof); + test_skip_if(!config_opt_safety_checks); + + safety_check_set_abort(&fake_abort); + /* Buffer overflow! */ + char* ptr = malloc(128); + ptr[128] = 0; + ptr = realloc(ptr, 129); + safety_check_set_abort(NULL); + free(ptr); + + assert_b_eq(fake_abort_called, true, "Redzone check didn't fire."); + fake_abort_called = false; +} +TEST_END + +TEST_BEGIN(test_rallocx_overflow) { + test_skip_if(!config_prof); + test_skip_if(!config_opt_safety_checks); + + safety_check_set_abort(&fake_abort); + /* Buffer overflow! */ + char* ptr = malloc(128); + ptr[128] = 0; + ptr = rallocx(ptr, 129, 0); + safety_check_set_abort(NULL); + free(ptr); + + assert_b_eq(fake_abort_called, true, "Redzone check didn't fire."); + fake_abort_called = false; +} +TEST_END + +TEST_BEGIN(test_xallocx_overflow) { + test_skip_if(!config_prof); + test_skip_if(!config_opt_safety_checks); + + safety_check_set_abort(&fake_abort); + /* Buffer overflow! */ + char* ptr = malloc(128); + ptr[128] = 0; + size_t result = xallocx(ptr, 129, 0, 0); + assert_zu_eq(result, 128, ""); + free(ptr); + assert_b_eq(fake_abort_called, true, "Redzone check didn't fire."); + fake_abort_called = false; + safety_check_set_abort(NULL); +} +TEST_END + +TEST_BEGIN(test_realloc_no_overflow) { + char* ptr = malloc(128); + ptr = realloc(ptr, 256); + ptr[128] = 0; + ptr[255] = 0; + free(ptr); + + ptr = malloc(128); + ptr = realloc(ptr, 64); + ptr[63] = 0; + ptr[0] = 0; + free(ptr); +} +TEST_END + +TEST_BEGIN(test_rallocx_no_overflow) { + char* ptr = malloc(128); + ptr = rallocx(ptr, 256, 0); + ptr[128] = 0; + ptr[255] = 0; + free(ptr); + + ptr = malloc(128); + ptr = rallocx(ptr, 64, 0); + ptr[63] = 0; + ptr[0] = 0; + free(ptr); +} +TEST_END + +int +main(void) { + return test( + test_malloc_free_overflow, + test_mallocx_dallocx_overflow, + test_malloc_sdallocx_overflow, + test_realloc_overflow, + test_rallocx_overflow, + test_xallocx_overflow, + test_realloc_no_overflow, + test_rallocx_no_overflow); +} diff --git a/skynet/3rd/jemalloc/test/unit/safety_check.sh b/skynet/3rd/jemalloc/test/unit/safety_check.sh new file mode 100644 index 0000000..8fcc7d8 --- /dev/null +++ b/skynet/3rd/jemalloc/test/unit/safety_check.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +if [ "x${enable_prof}" = "x1" ] ; then + export MALLOC_CONF="prof:true,lg_prof_sample:0" +fi diff --git a/skynet/3rd/jemalloc/test/unit/stats.c b/skynet/3rd/jemalloc/test/unit/stats.c index 4323bfa..646768e 100644 --- a/skynet/3rd/jemalloc/test/unit/stats.c +++ b/skynet/3rd/jemalloc/test/unit/stats.c @@ -228,7 +228,7 @@ gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) { TEST_BEGIN(test_stats_arenas_bins) { void *p; - size_t sz, curslabs, curregs; + size_t sz, curslabs, curregs, nonfull_slabs; uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t nslabs, nreslabs; int expected = config_stats ? 0 : ENOENT; @@ -289,6 +289,9 @@ TEST_BEGIN(test_stats_arenas_bins) { gen_mallctl_str(cmd, "curslabs", arena_ind); assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected, "Unexpected mallctl() result"); + gen_mallctl_str(cmd, "nonfull_slabs", arena_ind); + assert_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0), + expected, "Unexpected mallctl() result"); if (config_stats) { assert_u64_gt(nmalloc, 0, @@ -309,6 +312,8 @@ TEST_BEGIN(test_stats_arenas_bins) { "At least one slab should have been allocated"); assert_zu_gt(curslabs, 0, "At least one slab should be currently allocated"); + assert_zu_eq(nonfull_slabs, 0, + "slabs_nonfull should be empty"); } dallocx(p, 0);