diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index 7668179b0..000000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-version: 2
-
-jobs:
-  build:
-    docker:
-      - image: circleci/python:3.6.2
-    steps:
-      - checkout
-      - run:
-          command: ./continuous_integration/build_doc.sh
-          environment:
-            MINICONDA_PATH: ~/miniconda
-            CONDA_ENV_NAME: testenv
-      - store_artifacts:
-          path: doc/_build/html
-          destination: doc
diff --git a/.readthedocs-requirements.txt b/.readthedocs-requirements.txt
index 3699035ec..05dd31dee 100644
--- a/.readthedocs-requirements.txt
+++ b/.readthedocs-requirements.txt
@@ -1,4 +1,5 @@
 sphinx
+docutils<0.18
 numpy
 matplotlib
 pillow
diff --git a/CHANGES.rst b/CHANGES.rst
index 5328c2c79..d20a7a388 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,17 +1,78 @@
 Latest changes
 ==============
 
-1.0.1
------
+Release 1.2.0
+-------------
+
+- Fix a security issue where ``eval(pre_dispatch)`` could potentially run
+  arbitrary code. Now only basic numerics are supported.
+  https://github.com/joblib/joblib/pull/1327
+
+- Make sure that joblib works even when multiprocessing is not available,
+  for instance with Pyodide
+  https://github.com/joblib/joblib/pull/1256
+
+- Avoid unnecessary warnings when workers and main process delete
+  the temporary memmap folder contents concurrently.
+  https://github.com/joblib/joblib/pull/1263
+
+- Vendor loky 3.1.0 with several fixes to more robustly forcibly terminate
+  worker processes in case of a crash.
+  https://github.com/joblib/joblib/pull/1269
+
+- Fix memory alignment bug for pickles containing numpy arrays.
+  This is especially important when loading the pickle with
+  ``mmap_mode != None`` as the resulting ``numpy.memmap`` object
+  would not be able to correct the misalignment without performing
+  a memory copy.
+  This bug would cause invalid computation and segmentation faults
+  with native code that would directly access the underlying data
+  buffer of a numpy array, for instance C/C++/Cython code compiled
+  with older GCC versions or some old OpenBLAS written in platform
+  specific assembly.
+  https://github.com/joblib/joblib/pull/1254
+
+- Vendor cloudpickle 2.2.0 which adds support for PyPy 3.8+.
+
+- Vendor loky 3.3.0 which fixes a bug with leaking processes in case of
+  nested loky parallel calls and more reliability spawn the correct
+  number of reusable workers.
+
+Release 1.1.0
+--------------
+
+- Fix byte order inconsistency issue during deserialization using joblib.load
+  in cross-endian environment: the numpy arrays are now always loaded to
+  use the system byte order, independently of the byte order of the system
+  that serialized the pickle.
+  https://github.com/joblib/joblib/pull/1181
+
+- Fix joblib.Memory bug with the ``ignore`` parameter when the cached function
+  is a decorated function.
+  https://github.com/joblib/joblib/pull/1165
 
+- Fix `joblib.Memory` to properly handle caching for functions defined
+  interactively in a IPython session or in Jupyter notebook cell.
+  https://github.com/joblib/joblib/pull/1214
+
+- Update vendored loky (from version 2.9 to 3.0) and cloudpickle (from
+  version 1.6 to 2.0)
+  https://github.com/joblib/joblib/pull/1218
+
+Release 1.0.1
+-------------
+
+- Add check_call_in_cache method to check cache without calling function.
+  https://github.com/joblib/joblib/pull/820
+ 
 - dask: avoid redundant scattering of large arguments to make a more
   efficient use of the network resources and avoid crashing dask with
   "OSError: [Errno 55] No buffer space available"
   or "ConnectionResetError: [Errno 104] connection reset by peer".
   https://github.com/joblib/joblib/pull/1133
 
-1.0.0
------
+Release 1.0.0
+-------------
 
 - Make `joblib.hash` and `joblib.Memory` caching system compatible with `numpy
   >= 1.20.0`. Also make it explicit in the documentation that users should now
@@ -245,7 +306,7 @@ Maxime Weyl
 Maxime Weyl
 
     Loading a corrupted cached file with mmap mode enabled would
-    recompute the results and return them without memmory mapping.
+    recompute the results and return them without memory mapping.
 
 
 Release 0.12.3
@@ -329,8 +390,8 @@ Thomas Moreau
 
     Implement the ``'loky'`` backend with @ogrisel. This backend relies on
     a robust implementation of ``concurrent.futures.ProcessPoolExecutor``
-    with spawned processes that can be reused accross the ``Parallel``
-    calls. This fixes the bad interation with third paty libraries relying on
+    with spawned processes that can be reused across the ``Parallel``
+    calls. This fixes the bad integration with third paty libraries relying on
     thread pools, described in https://pythonhosted.org/joblib/parallel.html#bad-interaction-of-multiprocessing-and-third-party-libraries
 
     Limit the number of threads used in worker processes by C-libraries that
@@ -390,7 +451,7 @@ Alexandre Abadie
 
     Add ``register_compressor`` function for extending available compressors.
 
-    Allow passing a string to ``compress`` parameter in ``dump`` funtion. This
+    Allow passing a string to ``compress`` parameter in ``dump`` function. This
     string should correspond to the compressor used (e.g. zlib, gzip, lz4,
     etc). The default compression level is used in this case.
 
@@ -440,7 +501,7 @@ Loïc Estève
 Loïc Estève
 
     Fix handling of memmap objects with offsets greater than
-    mmap.ALLOCATIONGRANULARITY in ``joblib.Parrallel``. See
+    mmap.ALLOCATIONGRANULARITY in ``joblib.Parallel``. See
     https://github.com/joblib/joblib/issues/451 for more details.
 
 Loïc Estève
@@ -856,7 +917,7 @@ Release 0.6.5
 2012-09-15
 Yannick Schwartz
 
-    BUG: make sure that sets and dictionnaries give reproducible hashes
+    BUG: make sure that sets and dictionaries give reproducible hashes
 
 
 2012-07-18
@@ -887,7 +948,7 @@ GaelVaroquaux
 
     BUG: non-reproducible hashing: order of kwargs
 
-    The ordering of a dictionnary is random. As a result the function hashing
+    The ordering of a dictionary is random. As a result the function hashing
     was not reproducible. Pretty hard to test
 
 Release 0.6.3
@@ -1039,7 +1100,7 @@ Release 0.5.3
 2011-06-25
 Gael varoquaux
 
-   API: All the usefull symbols in the __init__
+   API: All the useful symbols in the __init__
 
 
 Release 0.5.2
@@ -1197,7 +1258,7 @@ Gael varoquaux
 Gael varoquaux
 2010-07-29
 
-    MISC: Silence tests (and hopefuly Yaroslav :P)
+    MISC: Silence tests (and hopefully Yaroslav :P)
 
 Release 0.4.3
 ----------------
diff --git a/LICENSE.txt b/LICENSE.txt
index 0f469af82..910537bd3 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2008-2016, The joblib developers.
+Copyright (c) 2008-2021, The joblib developers.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/README.rst b/README.rst
index f1aac980c..f9defa1db 100644
--- a/README.rst
+++ b/README.rst
@@ -1,4 +1,4 @@
-|PyPi| |Azure| |Codecov|
+|PyPi| |Azure| |ReadTheDocs| |Codecov| 
 
 .. |PyPi| image:: https://badge.fury.io/py/joblib.svg
    :target: https://badge.fury.io/py/joblib
@@ -6,7 +6,11 @@
 
 .. |Azure| image:: https://dev.azure.com/joblib/joblib/_apis/build/status/joblib.joblib?branchName=master
    :target: https://dev.azure.com/joblib/joblib/_build?definitionId=3&_a=summary&branchFilter=40
-   :alt: Codecov coverage
+   :alt: Azure CI status
+
+.. |ReadTheDocs| image:: https://readthedocs.org/projects/joblib/badge/?version=latest
+    :target: https://joblib.readthedocs.io/en/latest/?badge=latest
+    :alt: Documentation Status
 
 .. |Codecov| image:: https://codecov.io/gh/joblib/joblib/branch/master/graph/badge.svg
    :target: https://codecov.io/gh/joblib/joblib
@@ -44,7 +48,7 @@ Dependencies
 ============
 
 - Joblib has no mandatory dependencies besides Python (supported versions are
-  2.7+ and 3.4+).
+  3.7+).
 - Joblib has an optional dependency on Numpy (at least version 1.6.1) for array
   manipulation.
 - Joblib includes its own vendored copy of
@@ -130,40 +134,3 @@ but, the following git command may be used to generate the lines::
 
     git log --abbrev-commit --date=short --no-merges --sparse
 
-Licensing
----------
-
-joblib is **BSD-licenced** (3 clause):
-
-    This software is OSI Certified Open Source Software.
-    OSI Certified is a certification mark of the Open Source Initiative.
-
-    Copyright (c) 2009-2011, joblib developpers
-    All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-
-    * Neither the name of Gael Varoquaux. nor the names of other joblib
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-    **This software is provided by the copyright holders and contributors
-    "as is" and any express or implied warranties, including, but not
-    limited to, the implied warranties of merchantability and fitness for
-    a particular purpose are disclaimed. In no event shall the copyright
-    owner or contributors be liable for any direct, indirect, incidental,
-    special, exemplary, or consequential damages (including, but not
-    limited to, procurement of substitute goods or services; loss of use,
-    data, or profits; or business interruption) however caused and on any
-    theory of liability, whether in contract, strict liability, or tort
-    (including negligence or otherwise) arising in any way out of the use
-    of this software, even if advised of the possibility of such
-    damage.**
diff --git a/TODO.rst b/TODO.rst
index 83571634e..0028cc37b 100644
--- a/TODO.rst
+++ b/TODO.rst
@@ -40,7 +40,7 @@ Tasks at hand on joblib, in increasing order of difficulty.
 
 * add a 'argument_hash' keyword argument to Memory.cache, to be able to
   replace the hashing logic of memory for the input arguments. It should
-  accept as an input the dictionnary of arguments, as returned in
+  accept as an input the dictionary of arguments, as returned in
   func_inspect, and return a string.
 
 * add a sqlite db for provenance tracking. Store computation time and usage 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 845f3e300..3b58a4d3c 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -3,6 +3,13 @@
 # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
 # https://docs.microsoft.com/azure/devops/pipelines/languages/python
 
+schedules:
+- cron: "0 9 * * *"
+  displayName: Daily build
+  branches:
+    include:
+    - master
+
 trigger:
 - master
 
@@ -38,49 +45,48 @@ jobs:
         PYTHON_VERSION: "pypy3"
         LOKY_MAX_CPU_COUNT: "2"
 
-      linux_py38_distributed:
+      linux_py39_sklearn_tests:
+        imageName: 'ubuntu-latest'
+        PYTHON_VERSION: "3.9"
+        # SKIP_TESTS: "true"
+        SKLEARN_TESTS: "true"
+      linux_py310_distributed:
         # To be updated regularly to use the most recent versions of the
         # dependencies.
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.8"
-        EXTRA_CONDA_PACKAGES: "numpy=1.18 distributed=2.17"
-      linux_py37_sklearn_tests:
-        imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.7"
-        EXTRA_CONDA_PACKAGES: "numpy=1.16"
-        SKIP_TESTS: "true"
-        SKLEARN_TESTS: "true"
+        PYTHON_VERSION: "3.10"
+        EXTRA_CONDA_PACKAGES: "numpy=1.23 distributed=2022.2.0"
       linux_py37_distributed:
         imageName: 'ubuntu-latest'
         PYTHON_VERSION: "3.7"
         EXTRA_CONDA_PACKAGES: "numpy=1.15 distributed=2.13"
-      linux_py36_cython:
+      linux_py310_cython:
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.6"
-        EXTRA_CONDA_PACKAGES: "numpy=1.14"
+        PYTHON_VERSION: "3.10"
+        EXTRA_CONDA_PACKAGES: "numpy=1.23"
         CYTHON: "true"
-      linux_py36_no_multiprocessing_no_lzma:
+      linux_py37_no_multiprocessing_no_lzma:
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.6"
-        EXTRA_CONDA_PACKAGES: "numpy=1.14"
+        PYTHON_VERSION: "3.7"
+        EXTRA_CONDA_PACKAGES: "numpy=1.15"
         JOBLIB_MULTIPROCESSING: "0"
         NO_LZMA: "1"
-      linux_py36_no_numpy:
+      linux_py37_no_numpy:
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.6"
-
-      windows_py38:
-        imageName: "vs2017-win2016"
-        PYTHON_VERSION: "3.8"
-        EXTRA_CONDA_PACKAGES: "numpy=1.18"
-
-      macos_py38:
-        imageName: "macos-10.14"
-        PYTHON_VERSION: "3.8"
-        EXTRA_CONDA_PACKAGES: "numpy=1.18"
-      macos_py36_no_numpy:
-        imageName: "macos-10.14"
-        PYTHON_VERSION: "3.6"
+        PYTHON_VERSION: "3.7"
+
+      windows_py310:
+        imageName: "windows-latest"
+        PYTHON_VERSION: "3.10"
+        EXTRA_CONDA_PACKAGES: "numpy=1.23"
+
+      macos_py310:
+        imageName: "macos-latest"
+        PYTHON_VERSION: "3.10"
+        EXTRA_CONDA_PACKAGES: "numpy=1.23"
+      macos_py37_no_numpy:
+        imageName: "macos-latest"
+        PYTHON_VERSION: "3.7"
 
   variables:
     JUNITXML: 'test-data.xml'
diff --git a/benchmarks/bench_pickle.py b/benchmarks/bench_pickle.py
index 98f0bff26..7de096ef5 100755
--- a/benchmarks/bench_pickle.py
+++ b/benchmarks/bench_pickle.py
@@ -1,7 +1,7 @@
 """
 Benching joblib pickle I/O.
 
-Warning: this is slow, and the benchs are easily offset by other disk
+Warning: this is slow, and the benches are easily offset by other disk
 activity.
 """
 import os
diff --git a/conftest.py b/conftest.py
index e246e951f..875e9b9b9 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,10 +1,10 @@
-from distutils.version import LooseVersion
 
 import pytest
 from _pytest.doctest import DoctestItem
 
 import logging
 from joblib.parallel import mp
+from joblib.backports import LooseVersion
 try:
     import lz4
 except ImportError:
diff --git a/continuous_integration/build_doc.sh b/continuous_integration/build_doc.sh
deleted file mode 100755
index 0c1e6e206..000000000
--- a/continuous_integration/build_doc.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-set -x
-set -e
-
-# deactivate circleci virtualenv and setup a miniconda env instead
-if [[ `type -t deactivate` ]]; then
-    deactivate
-fi
-
-wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
-     -O miniconda.sh
-chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH
-export PATH="$MINICONDA_PATH/bin:$PATH"
-conda update --yes --quiet conda
-
-conda create -n $CONDA_ENV_NAME --yes --quiet python=3
-source activate $CONDA_ENV_NAME
-
-conda install --yes --quiet pip numpy sphinx matplotlib pillow dask distributed
-pip install sphinx-gallery numpydoc lz4
-
-python setup.py develop
-
-make doc 2>&1
diff --git a/continuous_integration/install.sh b/continuous_integration/install.sh
index d191e1467..41e21e77c 100755
--- a/continuous_integration/install.sh
+++ b/continuous_integration/install.sh
@@ -13,17 +13,17 @@ set -e
 create_new_conda_env() {
     conda update --yes conda
     TO_INSTALL="python=$PYTHON_VERSION pip pytest $EXTRA_CONDA_PACKAGES"
-    conda create -n testenv --yes $TO_INSTALL
+    conda create -n testenv --yes -c conda-forge $TO_INSTALL
     source activate testenv
 }
 
 create_new_pypy3_env() {
-    PYPY_FOLDER="pypy3.6-v7.3.1-linux64"
+    PYPY_FOLDER="pypy3.7-v7.3.7-linux64"
     wget https://downloads.python.org/pypy/$PYPY_FOLDER.tar.bz2
     tar xvf $PYPY_FOLDER.tar.bz2
     $PYPY_FOLDER/bin/pypy3 -m venv pypy3
     source pypy3/bin/activate
-    pip install -U pip pytest
+    pip install -U pip 'pytest'
 }
 
 if [[ "$PYTHON_VERSION" == "pypy3" ]]; then
@@ -47,10 +47,7 @@ if [ -n "$NUMPY_VERSION" ]; then
 fi
 
 if [[ "$COVERAGE" == "true" ]]; then
-    # TODO: unpin when https://github.com/nedbat/coveragepy/issues/883 is fixed
-    # Weird issues with recent version of coverage: unpin when not causing
-    # pytest to raise INTERNALERROR exceptions.
-    PIP_INSTALL_PACKAGES="$PIP_INSTALL_PACKAGES coverage==4.5.4 pytest-cov codecov"
+    PIP_INSTALL_PACKAGES="$PIP_INSTALL_PACKAGES coverage pytest-cov codecov"
 fi
 
 if [[ "pypy3" != *"$PYTHON_VERSION"* ]]; then
diff --git a/continuous_integration/run_tests.sh b/continuous_integration/run_tests.sh
index 51bed5fb0..68d233fdc 100755
--- a/continuous_integration/run_tests.sh
+++ b/continuous_integration/run_tests.sh
@@ -20,27 +20,46 @@ if [[ "$SKIP_TESTS" != "true" ]]; then
         export PYTEST_ADDOPTS="--cov=joblib --cov-append"
     fi
 
-    pytest joblib -vl --timeout=60 --junitxml="${JUNITXML}"
+    pytest joblib -vl --timeout=120 --junitxml="${JUNITXML}"
     make test-doc
 fi
 
 if [[ "$SKLEARN_TESTS" == "true" ]]; then
-    # Install scikit-learn from conda and test against the installed
+    # Install the nightly build of scikit-learn and test against the installed
     # development version of joblib.
-    conda remove -y numpy
-    conda install -y -c conda-forge cython pillow scikit-learn
+    # TODO: unpin pip once either https://github.com/pypa/pip/issues/10825
+    # accepts invalid HTML or Anaconda is fixed.
+    conda install -y -c conda-forge cython pillow numpy scipy "pip<22"
+    pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
     python -c "import sklearn; print('Testing scikit-learn', sklearn.__version__)"
 
     # Move to a dedicated folder to avoid being polluted by joblib specific conftest.py
     # and disable the doctest plugin to avoid issues with doctests in scikit-learn
     # docstrings that require setting print_changed_only=True temporarily.
-    cd "/tmp"
-    pytest -vl --maxfail=5 -p no:doctest -k "not test_import_is_deprecated" --pyargs sklearn
+    NEW_TEST_DIR=$(mktemp -d)
+    cd $NEW_TEST_DIR
+
+    pytest -vl --maxfail=5 -p no:doctest \
+        -k "not test_import_is_deprecated" \
+        -k "not test_check_memory" \
+        --pyargs sklearn
+
+    # Justification for skipping some tests:
+    #
+    # test_import_is_deprecated: Don't worry about deprecated imports: this is
+    # tested for real in upstream scikit-learn and this is not joblib's
+    # responsibility. Let's skip this test to avoid false positives in joblib's
+    # CI.
+    #
+    # test_check_memory: scikit-learn test need to be updated to avoid using
+    # cachedir: https://github.com/scikit-learn/scikit-learn/pull/22365
 fi
 
 if [[ "$SKIP_TESTS" != "true" && "$COVERAGE" == "true" ]]; then
     echo "Deleting empty coverage files:"
-    find . -name ".coverage.*" -size  0 -print -delete
+    # the "|| echo" is to avoid having 0 return states that terminate the
+    # script when the find uncounters permission denied
+    find . -name ".coverage.*" -size  0 -print -delete || echo
     echo "Combining .coverage.* files..."
     coverage combine --append  || echo "Found invalid coverage files."
     echo "Generating XML Coverage report..."
diff --git a/doc/conf.py b/doc/conf.py
index fa9e9289c..ae75b69d1 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -69,7 +69,7 @@
 
 # General information about the project.
 project = 'joblib'
-copyright = '2008-2018, Joblib developers'
+copyright = '2008-2021, Joblib developers'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -253,7 +253,7 @@
     shutil.copyfile('../README.rst', 'README.rst')
 except IOError:
     pass
-    # This fails during the tesing, as the code is ran in a different
+    # This fails during the testing, as the code is ran in a different
     # directory
 
 numpydoc_show_class_members = False
diff --git a/doc/memory.rst b/doc/memory.rst
index 0b9fbaef7..d539f5f61 100644
--- a/doc/memory.rst
+++ b/doc/memory.rst
@@ -145,7 +145,7 @@ arrays::
     >>> cachedir2 = 'your_cachedir2_location'
     >>> memory2 = Memory(cachedir2, mmap_mode='r')
     >>> square = memory2.cache(np.square)
-    >>> a = np.vander(np.arange(3)).astype(np.float)
+    >>> a = np.vander(np.arange(3)).astype(float)
     >>> square(a)
     ________________________________________________________________________________
     [Memory] Calling square...
@@ -391,8 +391,8 @@ Gotchas
   ``joblib.Memory`` cache can get invalidated when upgrading ``joblib``.
   Invalidation can also happen when upgrading a third party library (such as
   ``numpy``): in such a case, only the cached function calls with parameters
-  that are constructs (or contain references to contructs) defined in the
-  upgraded library should potentially be invalidated after the uprade.
+  that are constructs (or contain references to constructs) defined in the
+  upgraded library should potentially be invalidated after the upgrade.
 
 
 Ignoring some arguments
@@ -427,7 +427,7 @@ objects that, in addition of behaving like normal functions, expose
 methods useful for cache exploration and management.
 
 .. autoclass:: MemorizedFunc
-    :members: __init__, call, clear
+    :members: __init__, call, clear, check_call_in_cache
 
 
 ..
diff --git a/doc/parallel.rst b/doc/parallel.rst
index 466d613af..c4d3dd35a 100644
--- a/doc/parallel.rst
+++ b/doc/parallel.rst
@@ -69,7 +69,14 @@ In prior versions, the same effect could be achieved by hardcoding a
 specific backend implementation such as ``backend="threading"`` in the
 call to :class:`joblib.Parallel` but this is now considered a bad pattern
 (when done in a library) as it does not make it possible to override that
-choice with the ``parallel_backend`` context manager.
+choice with the :func:`~joblib.parallel_backend` context manager.
+
+
+.. topic:: The loky backend may not always be available
+
+   Some rare systems do not support multiprocessing (for instance
+   Pyodide). In this case the loky backend is not available and the
+   default backend falls back to threading.
 
 Besides builtin joblib backends, we can use
 `Joblib Apache Spark Backend <https://github.com/joblib/joblib-spark>`_
@@ -192,7 +199,7 @@ libraries:
 
 Since joblib 0.14, it is also possible to programmatically override the default
 number of threads using the ``inner_max_num_threads`` argument of the
-``parallel_backend`` function as follows:
+:func:`~joblib.parallel_backend` function as follows:
 
 .. code-block:: python
 
diff --git a/examples/compressors_comparison.py b/examples/compressors_comparison.py
index 64ebcf58d..3b20b10e5 100644
--- a/examples/compressors_comparison.py
+++ b/examples/compressors_comparison.py
@@ -8,7 +8,7 @@
 GZip compression methods.
 For each compared compression method, this example dumps and reloads a
 dataset fetched from an online machine-learning database. This gives 3
-informations: the size on disk of the compressed data, the time spent to dump
+information: the size on disk of the compressed data, the time spent to dump
 and the time spent to reload the data from disk.
 """
 
diff --git a/examples/serialization_and_wrappers.py b/examples/serialization_and_wrappers.py
index 8e7ccf9eb..d03f0123e 100644
--- a/examples/serialization_and_wrappers.py
+++ b/examples/serialization_and_wrappers.py
@@ -36,7 +36,7 @@ def func_async(i, *args):
 
 
 ###############################################################################
-# For most use-cases, using ``cloudpickle``` is efficient enough. However, this
+# For most use-cases, using ``cloudpickle`` is efficient enough. However, this
 # solution can be very slow to serialize large python objects, such as dict or
 # list, compared to the standard ``pickle`` serialization.
 #
@@ -78,7 +78,7 @@ def func_async(i, *args):
 # POSIX specification and can have bad interaction with compiled extensions
 # that use ``openmp``. Also, it is not possible to start processes with
 # ``fork`` on windows where only ``spawn`` is available. The ``loky`` backend
-# has been developped to mitigate these issues.
+# has been developed to mitigate these issues.
 #
 # To have fast pickling with ``loky``, it is possible to rely on ``pickle`` to
 # serialize all communications between the main process and the workers with
diff --git a/joblib/__init__.py b/joblib/__init__.py
index 9594f4c52..cb124c452 100644
--- a/joblib/__init__.py
+++ b/joblib/__init__.py
@@ -59,7 +59,7 @@
       >>> cachedir = 'your_cache_dir_goes_here'
       >>> mem = Memory(cachedir)
       >>> import numpy as np
-      >>> a = np.vander(np.arange(3)).astype(np.float)
+      >>> a = np.vander(np.arange(3)).astype(float)
       >>> square = mem.cache(np.square)
       >>> b = square(a)                                   # doctest: +ELLIPSIS
       ________________________________________________________________________________
@@ -106,7 +106,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = '1.0.1'
+__version__ = '1.2.0'
 
 
 import os
@@ -123,8 +123,7 @@
 from .parallel import register_parallel_backend
 from .parallel import parallel_backend
 from .parallel import effective_n_jobs
-
-from .externals.loky import wrap_non_picklable_objects
+from ._cloudpickle_wrapper import wrap_non_picklable_objects
 
 
 __all__ = ['Memory', 'MemorizedResult', 'PrintTime', 'Logger', 'hash', 'dump',
diff --git a/joblib/_cloudpickle_wrapper.py b/joblib/_cloudpickle_wrapper.py
new file mode 100644
index 000000000..3dbe3ae71
--- /dev/null
+++ b/joblib/_cloudpickle_wrapper.py
@@ -0,0 +1,17 @@
+"""
+Small shim of loky's cloudpickle_wrapper to avoid failure when
+multiprocessing is not available.
+"""
+
+
+from ._multiprocessing_helpers import mp
+
+
+def my_wrap_non_picklable_objects(obj, keep_wrapper=True):
+    return obj
+
+
+if mp is None:
+    wrap_non_picklable_objects = my_wrap_non_picklable_objects
+else:
+    from .externals.loky import wrap_non_picklable_objects # noqa
diff --git a/joblib/_dask.py b/joblib/_dask.py
index 009ddc6b1..57e247e91 100644
--- a/joblib/_dask.py
+++ b/joblib/_dask.py
@@ -12,11 +12,13 @@
 from .parallel import parallel_backend
 
 try:
+    import dask
     import distributed
 except ImportError:
+    dask = None
     distributed = None
 
-if distributed is not None:
+if dask is not None and distributed is not None:
     from dask.utils import funcname, itemgetter
     from dask.sizeof import sizeof
     from dask.distributed import (
@@ -24,10 +26,12 @@
         as_completed,
         get_client,
         secede,
-        rejoin
+        rejoin,
+        get_worker
     )
     from distributed.utils import thread_state
 
+
     try:
         # asyncio.TimeoutError, Python3-only error thrown by recent versions of
         # distributed
@@ -51,7 +55,7 @@ class _WeakKeyDictionary:
     such as large numpy arrays or pandas dataframes that are not hashable and
     therefore cannot be used as keys of traditional python dicts.
 
-    Futhermore using a dict with id(array) as key is not safe because the
+    Furthermore using a dict with id(array) as key is not safe because the
     Python is likely to reuse id of recently collected arrays.
     """
 
diff --git a/joblib/_memmapping_reducer.py b/joblib/_memmapping_reducer.py
index d58382222..9d350c032 100644
--- a/joblib/_memmapping_reducer.py
+++ b/joblib/_memmapping_reducer.py
@@ -99,6 +99,10 @@ def unlink_file(filename):
                 raise
             else:
                 time.sleep(.2)
+        except FileNotFoundError:
+            # In case of a race condition when deleting the temporary folder,
+            # avoid noisy FileNotFoundError exception in the resource tracker.
+            pass
 
 
 resource_tracker._CLEANUP_FUNCS['file'] = unlink_file
diff --git a/joblib/_multiprocessing_helpers.py b/joblib/_multiprocessing_helpers.py
index 1c5de2f8b..bde4bc190 100644
--- a/joblib/_multiprocessing_helpers.py
+++ b/joblib/_multiprocessing_helpers.py
@@ -14,6 +14,7 @@
 if mp:
     try:
         import multiprocessing as mp
+        import _multiprocessing  # noqa
     except ImportError:
         mp = None
 
diff --git a/joblib/_parallel_backends.py b/joblib/_parallel_backends.py
index 42645285d..c6ec537e9 100644
--- a/joblib/_parallel_backends.py
+++ b/joblib/_parallel_backends.py
@@ -431,10 +431,22 @@ def effective_n_jobs(self, n_jobs):
         if mp.current_process().daemon:
             # Daemonic processes cannot have children
             if n_jobs != 1:
-                warnings.warn(
-                    'Multiprocessing-backed parallel loops cannot be nested,'
-                    ' setting n_jobs=1',
-                    stacklevel=3)
+                if inside_dask_worker():
+                    msg = (
+                        "Inside a Dask worker with daemon=True, "
+                        "setting n_jobs=1.\nPossible work-arounds:\n"
+                        "- dask.config.set("
+                        "{'distributed.worker.daemon': False})"
+                        "- set the environment variable "
+                        "DASK_DISTRIBUTED__WORKER__DAEMON=False\n"
+                        "before creating your Dask cluster."
+                    )
+                else:
+                    msg = (
+                        'Multiprocessing-backed parallel loops '
+                        'cannot be nested, setting n_jobs=1'
+                    )
+                warnings.warn(msg, stacklevel=3)
             return 1
 
         if process_executor._CURRENT_DEPTH > 0:
@@ -509,10 +521,23 @@ def effective_n_jobs(self, n_jobs):
         elif mp.current_process().daemon:
             # Daemonic processes cannot have children
             if n_jobs != 1:
-                warnings.warn(
-                    'Loky-backed parallel loops cannot be called in a'
-                    ' multiprocessing, setting n_jobs=1',
-                    stacklevel=3)
+                if inside_dask_worker():
+                    msg = (
+                        "Inside a Dask worker with daemon=True, "
+                        "setting n_jobs=1.\nPossible work-arounds:\n"
+                        "- dask.config.set("
+                        "{'distributed.worker.daemon': False})\n"
+                        "- set the environment variable "
+                        "DASK_DISTRIBUTED__WORKER__DAEMON=False\n"
+                        "before creating your Dask cluster."
+                    )
+                else:
+                    msg = (
+                        'Loky-backed parallel loops cannot be called in a'
+                        ' multiprocessing, setting n_jobs=1'
+                    )
+                warnings.warn(msg, stacklevel=3)
+
             return 1
         elif not (self.in_main_thread() or self.nesting_level == 0):
             # Prevent posix fork inside in non-main posix threads
@@ -608,3 +633,21 @@ class FallbackToBackend(Exception):
 
     def __init__(self, backend):
         self.backend = backend
+
+
+def inside_dask_worker():
+    """Check whether the current function is executed inside a Dask worker.
+    """
+    # This function can not be in joblib._dask because there would be a
+    # circular import:
+    # _dask imports _parallel_backend that imports _dask ...
+    try:
+        from distributed import get_worker
+    except ImportError:
+        return False
+
+    try:
+        get_worker()
+        return True
+    except ValueError:
+        return False
diff --git a/joblib/_store_backends.py b/joblib/_store_backends.py
index d4389ed86..e96f30610 100644
--- a/joblib/_store_backends.py
+++ b/joblib/_store_backends.py
@@ -130,7 +130,7 @@ def configure(self, location, verbose=0, backend_options=dict()):
         verbose: int
             The level of verbosity of the store
         backend_options: dict
-            Contains a dictionnary of named paremeters used to configure the
+            Contains a dictionary of named parameters used to configure the
             store backend.
         """
 
diff --git a/joblib/_utils.py b/joblib/_utils.py
new file mode 100644
index 000000000..2dbd4f636
--- /dev/null
+++ b/joblib/_utils.py
@@ -0,0 +1,44 @@
+# Adapted from https://stackoverflow.com/a/9558001/2536294
+
+import ast
+import operator as op
+
+# supported operators
+operators = {
+    ast.Add: op.add,
+    ast.Sub: op.sub,
+    ast.Mult: op.mul,
+    ast.Div: op.truediv,
+    ast.FloorDiv: op.floordiv,
+    ast.Mod: op.mod,
+    ast.Pow: op.pow,
+    ast.USub: op.neg,
+}
+
+
+def eval_expr(expr):
+    """
+    >>> eval_expr('2*6')
+    12
+    >>> eval_expr('2**6')
+    64
+    >>> eval_expr('1 + 2*3**(4) / (6 + -7)')
+    -161.0
+    """
+    try:
+        return eval_(ast.parse(expr, mode="eval").body)
+    except (TypeError, SyntaxError, KeyError) as e:
+        raise ValueError(
+            f"{expr!r} is not a valid or supported arithmetic expression."
+        ) from e
+
+
+def eval_(node):
+    if isinstance(node, ast.Num):  # <number>
+        return node.n
+    elif isinstance(node, ast.BinOp):  # <left> <operator> <right>
+        return operators[type(node.op)](eval_(node.left), eval_(node.right))
+    elif isinstance(node, ast.UnaryOp):  # <operator> <operand> e.g., -1
+        return operators[type(node.op)](eval_(node.operand))
+    else:
+        raise TypeError(node)
diff --git a/joblib/backports.py b/joblib/backports.py
index cb2f7233d..c9936faae 100644
--- a/joblib/backports.py
+++ b/joblib/backports.py
@@ -2,12 +2,124 @@
 Backports of fixes for joblib dependencies
 """
 import os
+import re
 import time
 
-from distutils.version import LooseVersion
 from os.path import basename
 from multiprocessing import util
 
+# Prior to joblib 1.2, joblib used to import LooseVersion from
+# distutils.version. This import had a side-effect with setuptools that was
+# implicitly required in sklearn.show_versions() to work without raising an
+# exception for scikit-learn 1.0 and earlier. This has been fixed in
+# scikit-learn 1.1 (not yet released at the time of writing), see:
+# https://github.com/scikit-learn/scikit-learn/issues/22614
+#
+# To avoid unnecessary disruption for users who might update to joblib 1.2
+# prior to a release of scikit-learn that includes the fix, let's keep on
+# importing distutils here. TODO: Remove this for a future release of joblib,
+# e.g. 6 months after the release of scikit-learn 1.1.
+import distutils  # noqa
+
+
+class Version:
+    """Backport from deprecated distutils
+
+    We maintain this backport to avoid introducing a new dependency on
+    `packaging`.
+
+    We might rexplore this choice in the future if all major Python projects
+    introduce a dependency on packaging anyway.
+    """
+
+    def __init__(self, vstring=None):
+        if vstring:
+            self.parse(vstring)
+
+    def __repr__(self):
+        return "%s ('%s')" % (self.__class__.__name__, str(self))
+
+    def __eq__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c == 0
+
+    def __lt__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c < 0
+
+    def __le__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c <= 0
+
+    def __gt__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c > 0
+
+    def __ge__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c >= 0
+
+
+class LooseVersion(Version):
+    """Backport from deprecated distutils
+
+    We maintain this backport to avoid introducing a new dependency on
+    `packaging`.
+
+    We might rexplore this choice in the future if all major Python projects
+    introduce a dependency on packaging anyway.
+    """
+
+    component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)
+
+    def __init__(self, vstring=None):
+        if vstring:
+            self.parse(vstring)
+
+    def parse(self, vstring):
+        # I've given up on thinking I can reconstruct the version string
+        # from the parsed tuple -- so I just store the string here for
+        # use by __str__
+        self.vstring = vstring
+        components = [x for x in self.component_re.split(vstring)
+                      if x and x != '.']
+        for i, obj in enumerate(components):
+            try:
+                components[i] = int(obj)
+            except ValueError:
+                pass
+
+        self.version = components
+
+    def __str__(self):
+        return self.vstring
+
+    def __repr__(self):
+        return "LooseVersion ('%s')" % str(self)
+
+    def _cmp(self, other):
+        if isinstance(other, str):
+            other = LooseVersion(other)
+        elif not isinstance(other, LooseVersion):
+            return NotImplemented
+
+        if self.version == other.version:
+            return 0
+        if self.version < other.version:
+            return -1
+        if self.version > other.version:
+            return 1
+
 
 try:
     import numpy as np
diff --git a/joblib/compressor.py b/joblib/compressor.py
index 0dbd3dc93..8361d37d4 100644
--- a/joblib/compressor.py
+++ b/joblib/compressor.py
@@ -2,7 +2,7 @@
 
 import io
 import zlib
-from distutils.version import LooseVersion
+from joblib.backports import LooseVersion
 
 try:
     from threading import RLock
@@ -89,7 +89,7 @@ class CompressorWrapper():
     prefix: bytestring
         A bytestring corresponding to the magic number that identifies the
         file format associated to the compressor.
-    extention: str
+    extension: str
         The file extension used to automatically select this compressor during
         a dump to a file.
     """
diff --git a/joblib/disk.py b/joblib/disk.py
index 3b2735d04..32fbb89f6 100644
--- a/joblib/disk.py
+++ b/joblib/disk.py
@@ -66,10 +66,10 @@ def mkdirp(d):
 
 # if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
 # then retry up to RM_SUBDIRS_N_RETRY times. If it still fails, raise the
-# exception. this mecanism ensures that the sub-process gc have the time to
+# exception. this mechanism ensures that the sub-process gc have the time to
 # collect and close the memmaps before we fail.
 RM_SUBDIRS_RETRY_TIME = 0.1
-RM_SUBDIRS_N_RETRY = 5
+RM_SUBDIRS_N_RETRY = 10
 
 
 def rm_subdirs(path, onerror=None):
@@ -119,7 +119,7 @@ def delete_folder(folder_path, onerror=None, allow_non_empty=True):
                             folder_path, ignore_errors=False, onerror=None
                         )
                         util.debug(
-                            "Sucessfully deleted {}".format(folder_path))
+                            "Successfully deleted {}".format(folder_path))
                         break
                     else:
                         raise OSError(
diff --git a/joblib/externals/cloudpickle/__init__.py b/joblib/externals/cloudpickle/__init__.py
index f461d65e9..c802221ef 100644
--- a/joblib/externals/cloudpickle/__init__.py
+++ b/joblib/externals/cloudpickle/__init__.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import
-
-
 from .cloudpickle import *  # noqa
 from .cloudpickle_fast import CloudPickler, dumps, dump  # noqa
 
@@ -8,4 +5,4 @@
 # expose their Pickler subclass at top-level under the  "Pickler" name.
 Pickler = CloudPickler
 
-__version__ = '1.6.0'
+__version__ = '2.2.0'
diff --git a/joblib/externals/cloudpickle/cloudpickle.py b/joblib/externals/cloudpickle/cloudpickle.py
index 05d52afa0..317be6915 100644
--- a/joblib/externals/cloudpickle/cloudpickle.py
+++ b/joblib/externals/cloudpickle/cloudpickle.py
@@ -40,7 +40,6 @@
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
-from __future__ import print_function
 
 import builtins
 import dis
@@ -55,7 +54,8 @@
 import warnings
 
 from .compat import pickle
-from typing import Generic, Union, Tuple, Callable
+from collections import OrderedDict
+from typing import ClassVar, Generic, Union, Tuple, Callable
 from pickle import _getattribute
 from importlib._bootstrap import _find_spec
 
@@ -65,11 +65,6 @@
 except ImportError:
     _typing_extensions = Literal = Final = None
 
-if sys.version_info >= (3, 5, 3):
-    from typing import ClassVar
-else:  # pragma: no cover
-    ClassVar = None
-
 if sys.version_info >= (3, 8):
     from types import CellType
 else:
@@ -87,8 +82,11 @@ def g():
 # communication speed over compatibility:
 DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
 
+# Names of modules whose resources should be treated as dynamic.
+_PICKLE_BY_VALUE_MODULES = set()
+
 # Track the provenance of reconstructed dynamic classes to make it possible to
-# recontruct instances from the matching singleton class definition when
+# reconstruct instances from the matching singleton class definition when
 # appropriate and preserve the usual "isinstance" semantics of Python objects.
 _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
 _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
@@ -123,6 +121,77 @@ def _lookup_class_or_track(class_tracker_id, class_def):
     return class_def
 
 
+def register_pickle_by_value(module):
+    """Register a module to make it functions and classes picklable by value.
+
+    By default, functions and classes that are attributes of an importable
+    module are to be pickled by reference, that is relying on re-importing
+    the attribute from the module at load time.
+
+    If `register_pickle_by_value(module)` is called, all its functions and
+    classes are subsequently to be pickled by value, meaning that they can
+    be loaded in Python processes where the module is not importable.
+
+    This is especially useful when developing a module in a distributed
+    execution environment: restarting the client Python process with the new
+    source code is enough: there is no need to re-install the new version
+    of the module on all the worker nodes nor to restart the workers.
+
+    Note: this feature is considered experimental. See the cloudpickle
+    README.md file for more details and limitations.
+    """
+    if not isinstance(module, types.ModuleType):
+        raise ValueError(
+            f"Input should be a module object, got {str(module)} instead"
+        )
+    # In the future, cloudpickle may need a way to access any module registered
+    # for pickling by value in order to introspect relative imports inside
+    # functions pickled by value. (see
+    # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
+    # This access can be ensured by checking that module is present in
+    # sys.modules at registering time and assuming that it will still be in
+    # there when accessed during pickling. Another alternative would be to
+    # store a weakref to the module. Even though cloudpickle does not implement
+    # this introspection yet, in order to avoid a possible breaking change
+    # later, we still enforce the presence of module inside sys.modules.
+    if module.__name__ not in sys.modules:
+        raise ValueError(
+            f"{module} was not imported correctly, have you used an "
+            f"`import` statement to access it?"
+        )
+    _PICKLE_BY_VALUE_MODULES.add(module.__name__)
+
+
+def unregister_pickle_by_value(module):
+    """Unregister that the input module should be pickled by value."""
+    if not isinstance(module, types.ModuleType):
+        raise ValueError(
+            f"Input should be a module object, got {str(module)} instead"
+        )
+    if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
+        raise ValueError(f"{module} is not registered for pickle by value")
+    else:
+        _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
+
+
+def list_registry_pickle_by_value():
+    return _PICKLE_BY_VALUE_MODULES.copy()
+
+
+def _is_registered_pickle_by_value(module):
+    module_name = module.__name__
+    if module_name in _PICKLE_BY_VALUE_MODULES:
+        return True
+    while True:
+        parent_name = module_name.rsplit(".", 1)[0]
+        if parent_name == module_name:
+            break
+        if parent_name in _PICKLE_BY_VALUE_MODULES:
+            return True
+        module_name = parent_name
+    return False
+
+
 def _whichmodule(obj, name):
     """Find the module an object belongs to.
 
@@ -136,11 +205,14 @@ def _whichmodule(obj, name):
         # Workaround bug in old Python versions: prior to Python 3.7,
         # T.__module__ would always be set to "typing" even when the TypeVar T
         # would be defined in a different module.
-        #
-        # For such older Python versions, we ignore the __module__ attribute of
-        # TypeVar instances and instead exhaustively lookup those instances in
-        # all currently imported modules.
-        module_name = None
+        if name is not None and getattr(typing, name, None) is obj:
+            # Built-in TypeVar defined in typing such as AnyStr
+            return 'typing'
+        else:
+            # User defined or third-party TypeVar: __module__ attribute is
+            # irrelevant, thus trigger a exhaustive search for obj in all
+            # modules.
+            module_name = None
     else:
         module_name = getattr(obj, '__module__', None)
 
@@ -166,18 +238,35 @@ def _whichmodule(obj, name):
     return None
 
 
-def _is_importable(obj, name=None):
-    """Dispatcher utility to test the importability of various constructs."""
-    if isinstance(obj, types.FunctionType):
-        return _lookup_module_and_qualname(obj, name=name) is not None
-    elif issubclass(type(obj), type):
-        return _lookup_module_and_qualname(obj, name=name) is not None
+def _should_pickle_by_reference(obj, name=None):
+    """Test whether an function or a class should be pickled by reference
+
+     Pickling by reference means by that the object (typically a function or a
+     class) is an attribute of a module that is assumed to be importable in the
+     target Python environment. Loading will therefore rely on importing the
+     module and then calling `getattr` on it to access the function or class.
+
+     Pickling by reference is the only option to pickle functions and classes
+     in the standard library. In cloudpickle the alternative option is to
+     pickle by value (for instance for interactively or locally defined
+     functions and classes or for attributes of modules that have been
+     explicitly registered to be pickled by value.
+     """
+    if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
+        module_and_name = _lookup_module_and_qualname(obj, name=name)
+        if module_and_name is None:
+            return False
+        module, name = module_and_name
+        return not _is_registered_pickle_by_value(module)
+
     elif isinstance(obj, types.ModuleType):
         # We assume that sys.modules is primarily used as a cache mechanism for
         # the Python import machinery. Checking if a module has been added in
-        # is sys.modules therefore a cheap and simple heuristic to tell us whether
-        # we can assume  that a given module could be imported by name in
-        # another Python process.
+        # is sys.modules therefore a cheap and simple heuristic to tell us
+        # whether we can assume that a given module could be imported by name
+        # in another Python process.
+        if _is_registered_pickle_by_value(obj):
+            return False
         return obj.__name__ in sys.modules
     else:
         raise TypeError(
@@ -232,11 +321,13 @@ def _extract_code_globals(co):
     """
     out_names = _extract_code_globals_cache.get(co)
     if out_names is None:
-        names = co.co_names
-        out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
+        # We use a dict with None values instead of a set to get a
+        # deterministic order (assuming Python 3.6+) and avoid introducing
+        # non-deterministic pickle bytes as a results.
+        out_names = {name: None for name in _walk_global_ops(co)}
 
         # Declaring a function inside another one using the "def ..."
-        # syntax generates a constant code object corresonding to the one
+        # syntax generates a constant code object corresponding to the one
         # of the nested function's As the nested function may itself need
         # global variables, we need to introspect its code, extract its
         # globals, (look for code object in it's co_consts attribute..) and
@@ -244,7 +335,7 @@ def _extract_code_globals(co):
         if co.co_consts:
             for const in co.co_consts:
                 if isinstance(const, types.CodeType):
-                    out_names |= _extract_code_globals(const)
+                    out_names.update(_extract_code_globals(const))
 
         _extract_code_globals_cache[co] = out_names
 
@@ -419,13 +510,12 @@ def _builtin_type(name):
 
 def _walk_global_ops(code):
     """
-    Yield (opcode, argument number) tuples for all
-    global-referencing instructions in *code*.
+    Yield referenced name for all global-referencing instructions in *code*.
     """
     for instr in dis.get_instructions(code):
         op = instr.opcode
         if op in GLOBAL_OPS:
-            yield op, instr.arg
+            yield instr.argval
 
 
 def _extract_class_dict(cls):
@@ -452,15 +542,31 @@ def _extract_class_dict(cls):
 
 if sys.version_info[:2] < (3, 7):  # pragma: no branch
     def _is_parametrized_type_hint(obj):
-        # This is very cheap but might generate false positives.
+        # This is very cheap but might generate false positives. So try to
+        # narrow it down is good as possible.
+        type_module = getattr(type(obj), '__module__', None)
+        from_typing_extensions = type_module == 'typing_extensions'
+        from_typing = type_module == 'typing'
+
         # general typing Constructs
         is_typing = getattr(obj, '__origin__', None) is not None
 
         # typing_extensions.Literal
-        is_litteral = getattr(obj, '__values__', None) is not None
+        is_literal = (
+            (getattr(obj, '__values__', None) is not None)
+            and from_typing_extensions
+        )
 
         # typing_extensions.Final
-        is_final = getattr(obj, '__type__', None) is not None
+        is_final = (
+            (getattr(obj, '__type__', None) is not None)
+            and from_typing_extensions
+        )
+
+        # typing.ClassVar
+        is_classvar = (
+            (getattr(obj, '__type__', None) is not None) and from_typing
+        )
 
         # typing.Union/Tuple for old Python 3.5
         is_union = getattr(obj, '__union_params__', None) is not None
@@ -469,8 +575,8 @@ def _is_parametrized_type_hint(obj):
             getattr(obj, '__result__', None) is not None and
             getattr(obj, '__args__', None) is not None
         )
-        return any((is_typing, is_litteral, is_final, is_union, is_tuple,
-                    is_callable))
+        return any((is_typing, is_literal, is_final, is_classvar, is_union,
+                    is_tuple, is_callable))
 
     def _create_parametrized_type_hint(origin, args):
         return origin[args]
@@ -490,43 +596,21 @@ def parametrized_type_hint_getinitargs(obj):
     elif type(obj) is type(ClassVar):
         initargs = (ClassVar, obj.__type__)
     elif type(obj) is type(Generic):
-        parameters = obj.__parameters__
-        if len(obj.__parameters__) > 0:
-            # in early Python 3.5, __parameters__ was sometimes
-            # preferred to __args__
-            initargs = (obj.__origin__, parameters)
-
-        else:
-            initargs = (obj.__origin__, obj.__args__)
+        initargs = (obj.__origin__, obj.__args__)
     elif type(obj) is type(Union):
-        if sys.version_info < (3, 5, 3):  # pragma: no cover
-            initargs = (Union, obj.__union_params__)
-        else:
-            initargs = (Union, obj.__args__)
+        initargs = (Union, obj.__args__)
     elif type(obj) is type(Tuple):
-        if sys.version_info < (3, 5, 3):  # pragma: no cover
-            initargs = (Tuple, obj.__tuple_params__)
-        else:
-            initargs = (Tuple, obj.__args__)
+        initargs = (Tuple, obj.__args__)
     elif type(obj) is type(Callable):
-        if sys.version_info < (3, 5, 3):  # pragma: no cover
-            args = obj.__args__
-            result = obj.__result__
-            if args != Ellipsis:
-                if isinstance(args, tuple):
-                    args = list(args)
-                else:
-                    args = [args]
+        (*args, result) = obj.__args__
+        if len(args) == 1 and args[0] is Ellipsis:
+            args = Ellipsis
         else:
-            (*args, result) = obj.__args__
-            if len(args) == 1 and args[0] is Ellipsis:
-                args = Ellipsis
-            else:
-                args = list(args)
+            args = list(args)
         initargs = (Callable, (args, result))
     else:  # pragma: no cover
         raise pickle.PicklingError(
-            "Cloudpickle Error: Unknown type {}".format(type(obj))
+            f"Cloudpickle Error: Unknown type {type(obj)}"
         )
     return initargs
 
@@ -557,8 +641,11 @@ def _rebuild_tornado_coroutine(func):
 loads = pickle.loads
 
 
-# hack for __import__ not working as desired
 def subimport(name):
+    # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
+    # the name of a submodule, __import__ will return the top-level root module
+    # of this submodule. For instance, __import__('os.path') returns the `os`
+    # module.
     __import__(name)
     return sys.modules[name]
 
@@ -603,7 +690,7 @@ def instance(cls):
 
 
 @instance
-class _empty_cell_value(object):
+class _empty_cell_value:
     """sentinel for empty closures
     """
     @classmethod
@@ -632,7 +719,7 @@ def _fill_function(*args):
         keys = ['globals', 'defaults', 'dict', 'module', 'closure_values']
         state = dict(zip(keys, args[1:]))
     else:
-        raise ValueError('Unexpected _fill_value arguments: %r' % (args,))
+        raise ValueError(f'Unexpected _fill_value arguments: {args!r}')
 
     # - At pickling time, any dynamic global variable used by func is
     #   serialized by value (in state['globals']).
@@ -676,6 +763,12 @@ def _fill_function(*args):
     return func
 
 
+def _make_function(code, globals, name, argdefs, closure):
+    # Setting __builtins__ in globals is needed for nogil CPython.
+    globals["__builtins__"] = __builtins__
+    return types.FunctionType(code, globals, name, argdefs, closure)
+
+
 def _make_empty_cell():
     if False:
         # trick the compiler into creating an empty cell in our lambda
@@ -699,7 +792,7 @@ def _make_skel_func(code, cell_count, base_globals=None):
     """
     # This function is deprecated and should be removed in cloudpickle 1.7
     warnings.warn(
-        "A pickle file created using an old (<=1.4.1) version of cloudpicke "
+        "A pickle file created using an old (<=1.4.1) version of cloudpickle "
         "is currently being loaded. This is not supported by cloudpickle and "
         "will break in cloudpickle 1.7", category=UserWarning
     )
@@ -800,29 +893,33 @@ def _make_typevar(name, bound, constraints, covariant, contravariant,
 
 
 def _decompose_typevar(obj):
-    try:
-        class_tracker_id = _get_or_create_tracker_id(obj)
-    except TypeError:  # pragma: nocover
-        # TypeVar instances are not weakref-able in Python 3.5.3
-        class_tracker_id = None
     return (
         obj.__name__, obj.__bound__, obj.__constraints__,
         obj.__covariant__, obj.__contravariant__,
-        class_tracker_id,
+        _get_or_create_tracker_id(obj),
     )
 
 
 def _typevar_reduce(obj):
-    # TypeVar instances have no __qualname__ hence we pass the name explicitly.
+    # TypeVar instances require the module information hence why we
+    # are not using the _should_pickle_by_reference directly
     module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
+
     if module_and_name is None:
         return (_make_typevar, _decompose_typevar(obj))
+    elif _is_registered_pickle_by_value(module_and_name[0]):
+        return (_make_typevar, _decompose_typevar(obj))
+
     return (getattr, module_and_name)
 
 
 def _get_bases(typ):
-    if hasattr(typ, '__orig_bases__'):
+    if '__orig_bases__' in getattr(typ, '__dict__', {}):
         # For generic types (see PEP 560)
+        # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
+        # correct.  Subclasses of a fully-parameterized generic class does not
+        # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
+        # will return True because it's defined in the base class.
         bases_attr = '__orig_bases__'
     else:
         # For regular class objects
@@ -830,13 +927,22 @@ def _get_bases(typ):
     return getattr(typ, bases_attr)
 
 
-def _make_dict_keys(obj):
-    return dict.fromkeys(obj).keys()
+def _make_dict_keys(obj, is_ordered=False):
+    if is_ordered:
+        return OrderedDict.fromkeys(obj).keys()
+    else:
+        return dict.fromkeys(obj).keys()
 
 
-def _make_dict_values(obj):
-    return {i: _ for i, _ in enumerate(obj)}.values()
+def _make_dict_values(obj, is_ordered=False):
+    if is_ordered:
+        return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
+    else:
+        return {i: _ for i, _ in enumerate(obj)}.values()
 
 
-def _make_dict_items(obj):
-    return obj.items()
+def _make_dict_items(obj, is_ordered=False):
+    if is_ordered:
+        return OrderedDict(obj).items()
+    else:
+        return obj.items()
diff --git a/joblib/externals/cloudpickle/cloudpickle_fast.py b/joblib/externals/cloudpickle/cloudpickle_fast.py
index fa8da0f63..8741dcbda 100644
--- a/joblib/externals/cloudpickle/cloudpickle_fast.py
+++ b/joblib/externals/cloudpickle/cloudpickle_fast.py
@@ -6,7 +6,7 @@
 is only available for Python versions 3.8+, a lot of backward-compatibility
 code is also removed.
 
-Note that the C Pickler sublassing API is CPython-specific. Therefore, some
+Note that the C Pickler subclassing API is CPython-specific. Therefore, some
 guards present in cloudpickle.py that were written to handle PyPy specificities
 are not present in cloudpickle_fast.py
 """
@@ -23,23 +23,23 @@
 import typing
 
 from enum import Enum
-from collections import ChainMap
+from collections import ChainMap, OrderedDict
 
 from .compat import pickle, Pickler
 from .cloudpickle import (
     _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL,
-    _find_imported_submodules, _get_cell_contents, _is_importable,
+    _find_imported_submodules, _get_cell_contents, _should_pickle_by_reference,
     _builtin_type, _get_or_create_tracker_id,  _make_skeleton_class,
     _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport,
     _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType,
     _is_parametrized_type_hint, PYPY, cell_set,
     parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
     builtin_code_type,
-    _make_dict_keys, _make_dict_values, _make_dict_items,
+    _make_dict_keys, _make_dict_values, _make_dict_items, _make_function,
 )
 
 
-if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY:
+if pickle.HIGHEST_PROTOCOL >= 5:
     # Shorthands similar to pickle.dump/pickle.dumps
 
     def dump(obj, file, protocol=None, buffer_callback=None):
@@ -123,7 +123,7 @@ def _class_getnewargs(obj):
 
 
 def _enum_getnewargs(obj):
-    members = dict((e.name, e.value) for e in obj)
+    members = {e.name: e.value for e in obj}
     return (obj.__bases__, obj.__name__, obj.__qualname__, members,
             obj.__module__, _get_or_create_tracker_id(obj), None)
 
@@ -180,7 +180,7 @@ def _class_getstate(obj):
     clsdict.pop('__weakref__', None)
 
     if issubclass(type(obj), abc.ABCMeta):
-        # If obj is an instance of an ABCMeta subclass, dont pickle the
+        # If obj is an instance of an ABCMeta subclass, don't pickle the
         # cache/negative caches populated during isinstance/issubclass
         # checks, but pickle the list of registered subclasses of obj.
         clsdict.pop('_abc_cache', None)
@@ -218,7 +218,7 @@ def _class_getstate(obj):
 def _enum_getstate(obj):
     clsdict, slotstate = _class_getstate(obj)
 
-    members = dict((e.name, e.value) for e in obj)
+    members = {e.name: e.value for e in obj}
     # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class:
     # Those attributes are already handled by the metaclass.
     for attrname in ["_generate_next_value_", "_member_names_",
@@ -244,7 +244,46 @@ def _enum_getstate(obj):
 
 def _code_reduce(obj):
     """codeobject reducer"""
-    if hasattr(obj, "co_posonlyargcount"):  # pragma: no branch
+    # If you are not sure about the order of arguments, take a look at help
+    # of the specific type from types, for example:
+    # >>> from types import CodeType
+    # >>> help(CodeType)
+    if hasattr(obj, "co_exceptiontable"):  # pragma: no branch
+        # Python 3.11 and later: there are some new attributes
+        # related to the enhanced exceptions.
+        args = (
+            obj.co_argcount, obj.co_posonlyargcount,
+            obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
+            obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
+            obj.co_varnames, obj.co_filename, obj.co_name, obj.co_qualname,
+            obj.co_firstlineno, obj.co_linetable, obj.co_exceptiontable,
+            obj.co_freevars, obj.co_cellvars,
+        )
+    elif hasattr(obj, "co_linetable"):  # pragma: no branch
+        # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
+        # expects obj.co_linetable instead.
+        args = (
+            obj.co_argcount, obj.co_posonlyargcount,
+            obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
+            obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
+            obj.co_varnames, obj.co_filename, obj.co_name,
+            obj.co_firstlineno, obj.co_linetable, obj.co_freevars,
+            obj.co_cellvars
+        )
+    elif hasattr(obj, "co_nmeta"):  # pragma: no cover
+        # "nogil" Python: modified attributes from 3.9
+        args = (
+            obj.co_argcount, obj.co_posonlyargcount,
+            obj.co_kwonlyargcount, obj.co_nlocals, obj.co_framesize,
+            obj.co_ndefaultargs, obj.co_nmeta,
+            obj.co_flags, obj.co_code, obj.co_consts,
+            obj.co_varnames, obj.co_filename, obj.co_name,
+            obj.co_firstlineno, obj.co_lnotab, obj.co_exc_handlers,
+            obj.co_jump_table, obj.co_freevars, obj.co_cellvars,
+            obj.co_free2reg, obj.co_cell2reg
+        )
+    elif hasattr(obj, "co_posonlyargcount"):
+        # Backward compat for 3.9 and older
         args = (
             obj.co_argcount, obj.co_posonlyargcount,
             obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
@@ -254,6 +293,7 @@ def _code_reduce(obj):
             obj.co_cellvars
         )
     else:
+        # Backward compat for even older versions of Python
         args = (
             obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals,
             obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts,
@@ -339,11 +379,16 @@ def _memoryview_reduce(obj):
 
 
 def _module_reduce(obj):
-    if _is_importable(obj):
+    if _should_pickle_by_reference(obj):
         return subimport, (obj.__name__,)
     else:
-        obj.__dict__.pop('__builtins__', None)
-        return dynamic_subimport, (obj.__name__, vars(obj))
+        # Some external libraries can populate the "__builtins__" entry of a
+        # module's `__dict__` with unpicklable objects (see #316). For that
+        # reason, we do not attempt to pickle the "__builtins__" entry, and
+        # restore a default value for it at unpickling time.
+        state = obj.__dict__.copy()
+        state.pop('__builtins__', None)
+        return dynamic_subimport, (obj.__name__, state)
 
 
 def _method_reduce(obj):
@@ -396,7 +441,7 @@ def _class_reduce(obj):
         return type, (NotImplemented,)
     elif obj in _BUILTIN_TYPE_NAMES:
         return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
-    elif not _is_importable(obj):
+    elif not _should_pickle_by_reference(obj):
         return _dynamic_class_reduce(obj)
     return NotImplemented
 
@@ -419,6 +464,24 @@ def _dict_items_reduce(obj):
     return _make_dict_items, (dict(obj), )
 
 
+def _odict_keys_reduce(obj):
+    # Safer not to ship the full dict as sending the rest might
+    # be unintended and could potentially cause leaking of
+    # sensitive information
+    return _make_dict_keys, (list(obj), True)
+
+
+def _odict_values_reduce(obj):
+    # Safer not to ship the full dict as sending the rest might
+    # be unintended and could potentially cause leaking of
+    # sensitive information
+    return _make_dict_values, (list(obj), True)
+
+
+def _odict_items_reduce(obj):
+    return _make_dict_items, (dict(obj), True)
+
+
 # COLLECTIONS OF OBJECTS STATE SETTERS
 # ------------------------------------
 # state setters are called at unpickling time, once the object is created and
@@ -426,7 +489,7 @@ def _dict_items_reduce(obj):
 
 
 def _function_setstate(obj, state):
-    """Update the state of a dynaamic function.
+    """Update the state of a dynamic function.
 
     As __closure__ and __globals__ are readonly attributes of a function, we
     cannot rely on the native setstate routine of pickle.load_build, that calls
@@ -495,7 +558,13 @@ class CloudPickler(Pickler):
     _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
     _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
     _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
-
+    _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
+    _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
+    _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
+    _dispatch_table[abc.abstractmethod] = _classmethod_reduce
+    _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
+    _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
+    _dispatch_table[abc.abstractproperty] = _property_reduce
 
     dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
 
@@ -505,7 +574,7 @@ def _dynamic_function_reduce(self, func):
         """Reduce a function that is not pickleable via attribute lookup."""
         newargs = self._function_getnewargs(func)
         state = _function_getstate(func)
-        return (types.FunctionType, newargs, state, None, None,
+        return (_make_function, newargs, state, None, None,
                 _function_setstate)
 
     def _function_reduce(self, obj):
@@ -520,7 +589,7 @@ def _function_reduce(self, obj):
         As opposed to cloudpickle.py, There no special handling for builtin
         pypy functions because cloudpickle_fast is CPython-specific.
         """
-        if _is_importable(obj):
+        if _should_pickle_by_reference(obj):
             return NotImplemented
         else:
             return self._dynamic_function_reduce(obj)
@@ -572,6 +641,32 @@ def dump(self, obj):
                 raise
 
     if pickle.HIGHEST_PROTOCOL >= 5:
+        def __init__(self, file, protocol=None, buffer_callback=None):
+            if protocol is None:
+                protocol = DEFAULT_PROTOCOL
+            Pickler.__init__(
+                self, file, protocol=protocol, buffer_callback=buffer_callback
+            )
+            # map functions __globals__ attribute ids, to ensure that functions
+            # sharing the same global namespace at pickling time also share
+            # their global namespace at unpickling time.
+            self.globals_ref = {}
+            self.proto = int(protocol)
+    else:
+        def __init__(self, file, protocol=None):
+            if protocol is None:
+                protocol = DEFAULT_PROTOCOL
+            Pickler.__init__(self, file, protocol=protocol)
+            # map functions __globals__ attribute ids, to ensure that functions
+            # sharing the same global namespace at pickling time also share
+            # their global namespace at unpickling time.
+            self.globals_ref = {}
+            assert hasattr(self, 'proto')
+
+    if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY:
+        # Pickler is the C implementation of the CPython pickler and therefore
+        # we rely on reduce_override method to customize the pickler behavior.
+
         # `CloudPickler.dispatch` is only left for backward compatibility - note
         # that when using protocol 5, `CloudPickler.dispatch` is not an
         # extension of `Pickler.dispatch` dictionary, because CloudPickler
@@ -579,7 +674,7 @@ def dump(self, obj):
         # `dispatch` attribute.  Earlier versions of the protocol 5 CloudPickler
         # used `CloudPickler.dispatch` as a class-level attribute storing all
         # reducers implemented by cloudpickle, but the attribute name was not a
-        # great choice given the meaning of `Cloudpickler.dispatch` when
+        # great choice given the meaning of `CloudPickler.dispatch` when
         # `CloudPickler` extends the pure-python pickler.
         dispatch = dispatch_table
 
@@ -592,17 +687,6 @@ def dump(self, obj):
         # availability of both notions coincide on CPython's pickle and the
         # pickle5 backport, but it may not be the case anymore when pypy
         # implements protocol 5
-        def __init__(self, file, protocol=None, buffer_callback=None):
-            if protocol is None:
-                protocol = DEFAULT_PROTOCOL
-            Pickler.__init__(
-                self, file, protocol=protocol, buffer_callback=buffer_callback
-            )
-            # map functions __globals__ attribute ids, to ensure that functions
-            # sharing the same global namespace at pickling time also share
-            # their global namespace at unpickling time.
-            self.globals_ref = {}
-            self.proto = int(protocol)
 
         def reducer_override(self, obj):
             """Type-agnostic reducing callback for function and classes.
@@ -653,7 +737,7 @@ def reducer_override(self, obj):
                 return self._function_reduce(obj)
             else:
                 # fallback to save_global, including the Pickler's
-                # distpatch_table
+                # dispatch_table
                 return NotImplemented
 
     else:
@@ -663,16 +747,6 @@ def reducer_override(self, obj):
         # hard-coded call to save_global when pickling meta-classes.
         dispatch = Pickler.dispatch.copy()
 
-        def __init__(self, file, protocol=None):
-            if protocol is None:
-                protocol = DEFAULT_PROTOCOL
-            Pickler.__init__(self, file, protocol=protocol)
-            # map functions __globals__ attribute ids, to ensure that functions
-            # sharing the same global namespace at pickling time also share
-            # their global namespace at unpickling time.
-            self.globals_ref = {}
-            assert hasattr(self, 'proto')
-
         def _save_reduce_pickle5(self, func, args, state=None, listitems=None,
                                  dictitems=None, state_setter=None, obj=None):
             save = self.save
@@ -724,7 +798,7 @@ def save_global(self, obj, name=None, pack=struct.pack):
                 )
             elif name is not None:
                 Pickler.save_global(self, obj, name=name)
-            elif not _is_importable(obj, name=name):
+            elif not _should_pickle_by_reference(obj, name=name):
                 self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
             else:
                 Pickler.save_global(self, obj, name=name)
@@ -736,7 +810,7 @@ def save_function(self, obj, name=None):
             Determines what kind of function obj is (e.g. lambda, defined at
             interactive prompt, etc) and handles the pickling appropriately.
             """
-            if _is_importable(obj, name=name):
+            if _should_pickle_by_reference(obj, name=name):
                 return Pickler.save_global(self, obj, name=name)
             elif PYPY and isinstance(obj.__code__, builtin_code_type):
                 return self.save_pypy_builtin_func(obj)
diff --git a/joblib/externals/cloudpickle/compat.py b/joblib/externals/cloudpickle/compat.py
index afa285f62..5e9b52773 100644
--- a/joblib/externals/cloudpickle/compat.py
+++ b/joblib/externals/cloudpickle/compat.py
@@ -7,7 +7,12 @@
         from pickle5 import Pickler  # noqa: F401
     except ImportError:
         import pickle  # noqa: F401
+
+        # Use the Python pickler for old CPython versions
         from pickle import _Pickler as Pickler  # noqa: F401
 else:
     import pickle  # noqa: F401
-    from _pickle import Pickler  # noqa: F401
+
+    # Pickler will the C implementation in CPython and the Python
+    # implementation in PyPy
+    from pickle import Pickler  # noqa: F401
diff --git a/joblib/externals/loky/__init__.py b/joblib/externals/loky/__init__.py
index 21f3bb6b9..fd2008d78 100644
--- a/joblib/externals/loky/__init__.py
+++ b/joblib/externals/loky/__init__.py
@@ -3,11 +3,18 @@
 :class:`ProcessPoolExecutor` and a function :func:`get_reusable_executor` which
 hide the pool management under the hood.
 """
-from ._base import Executor, Future
-from ._base import wait, as_completed
-from ._base import TimeoutError, CancelledError
-from ._base import ALL_COMPLETED, FIRST_COMPLETED, FIRST_EXCEPTION
+from concurrent.futures import (
+    ALL_COMPLETED,
+    FIRST_COMPLETED,
+    FIRST_EXCEPTION,
+    CancelledError,
+    Executor,
+    TimeoutError,
+    as_completed,
+    wait,
+)
 
+from ._base import Future
 from .backend.context import cpu_count
 from .backend.reduction import set_loky_pickler
 from .reusable_executor import get_reusable_executor
@@ -22,4 +29,4 @@
            "wrap_non_picklable_objects", "set_loky_pickler"]
 
 
-__version__ = '2.9.0'
+__version__ = '3.3.0'
diff --git a/joblib/externals/loky/_base.py b/joblib/externals/loky/_base.py
index 92422bbf3..cd8f34100 100644
--- a/joblib/externals/loky/_base.py
+++ b/joblib/externals/loky/_base.py
@@ -1,5 +1,5 @@
 ###############################################################################
-# Backport concurrent.futures for python2.7/3.3
+# Modification of concurrent.futures.Future
 #
 # author: Thomas Moreau and Olivier Grisel
 #
@@ -10,618 +10,19 @@
 # Copyright 2009 Brian Quinlan. All Rights Reserved.
 # Licensed to PSF under a Contributor Agreement.
 
-import sys
-import time
-import logging
-import threading
-import collections
-
-
-if sys.version_info[:2] >= (3, 3):
-
-    from concurrent.futures import wait, as_completed
-    from concurrent.futures import TimeoutError, CancelledError
-    from concurrent.futures import Executor, Future as _BaseFuture
-
-    from concurrent.futures import FIRST_EXCEPTION
-    from concurrent.futures import ALL_COMPLETED, FIRST_COMPLETED
-
-    from concurrent.futures._base import LOGGER
-    from concurrent.futures._base import PENDING, RUNNING, CANCELLED
-    from concurrent.futures._base import CANCELLED_AND_NOTIFIED, FINISHED
-else:
-
-    FIRST_COMPLETED = 'FIRST_COMPLETED'
-    FIRST_EXCEPTION = 'FIRST_EXCEPTION'
-    ALL_COMPLETED = 'ALL_COMPLETED'
-    _AS_COMPLETED = '_AS_COMPLETED'
-
-    # Possible future states (for internal use by the futures package).
-    PENDING = 'PENDING'
-    RUNNING = 'RUNNING'
-    # The future was cancelled by the user...
-    CANCELLED = 'CANCELLED'
-    # ...and _Waiter.add_cancelled() was called by a worker.
-    CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED'
-    FINISHED = 'FINISHED'
-
-    _FUTURE_STATES = [
-        PENDING,
-        RUNNING,
-        CANCELLED,
-        CANCELLED_AND_NOTIFIED,
-        FINISHED
-    ]
-
-    _STATE_TO_DESCRIPTION_MAP = {
-        PENDING: "pending",
-        RUNNING: "running",
-        CANCELLED: "cancelled",
-        CANCELLED_AND_NOTIFIED: "cancelled",
-        FINISHED: "finished"
-    }
-
-    # Logger for internal use by the futures package.
-    LOGGER = logging.getLogger("concurrent.futures")
-
-    class Error(Exception):
-        """Base class for all future-related exceptions."""
-        pass
-
-    class CancelledError(Error):
-        """The Future was cancelled."""
-        pass
-
-    class TimeoutError(Error):
-        """The operation exceeded the given deadline."""
-        pass
-
-    class _Waiter(object):
-        """Provides the event that wait() and as_completed() block on."""
-        def __init__(self):
-            self.event = threading.Event()
-            self.finished_futures = []
-
-        def add_result(self, future):
-            self.finished_futures.append(future)
-
-        def add_exception(self, future):
-            self.finished_futures.append(future)
-
-        def add_cancelled(self, future):
-            self.finished_futures.append(future)
-
-    class _AsCompletedWaiter(_Waiter):
-        """Used by as_completed()."""
-
-        def __init__(self):
-            super(_AsCompletedWaiter, self).__init__()
-            self.lock = threading.Lock()
-
-        def add_result(self, future):
-            with self.lock:
-                super(_AsCompletedWaiter, self).add_result(future)
-                self.event.set()
-
-        def add_exception(self, future):
-            with self.lock:
-                super(_AsCompletedWaiter, self).add_exception(future)
-                self.event.set()
-
-        def add_cancelled(self, future):
-            with self.lock:
-                super(_AsCompletedWaiter, self).add_cancelled(future)
-                self.event.set()
-
-    class _FirstCompletedWaiter(_Waiter):
-        """Used by wait(return_when=FIRST_COMPLETED)."""
-
-        def add_result(self, future):
-            super(_FirstCompletedWaiter, self).add_result(future)
-            self.event.set()
-
-        def add_exception(self, future):
-            super(_FirstCompletedWaiter, self).add_exception(future)
-            self.event.set()
-
-        def add_cancelled(self, future):
-            super(_FirstCompletedWaiter, self).add_cancelled(future)
-            self.event.set()
-
-    class _AllCompletedWaiter(_Waiter):
-        """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED)."""
-
-        def __init__(self, num_pending_calls, stop_on_exception):
-            self.num_pending_calls = num_pending_calls
-            self.stop_on_exception = stop_on_exception
-            self.lock = threading.Lock()
-            super(_AllCompletedWaiter, self).__init__()
-
-        def _decrement_pending_calls(self):
-            with self.lock:
-                self.num_pending_calls -= 1
-                if not self.num_pending_calls:
-                    self.event.set()
-
-        def add_result(self, future):
-            super(_AllCompletedWaiter, self).add_result(future)
-            self._decrement_pending_calls()
-
-        def add_exception(self, future):
-            super(_AllCompletedWaiter, self).add_exception(future)
-            if self.stop_on_exception:
-                self.event.set()
-            else:
-                self._decrement_pending_calls()
-
-        def add_cancelled(self, future):
-            super(_AllCompletedWaiter, self).add_cancelled(future)
-            self._decrement_pending_calls()
-
-    class _AcquireFutures(object):
-        """A context manager that does an ordered acquire of Future conditions.
-        """
-
-        def __init__(self, futures):
-            self.futures = sorted(futures, key=id)
-
-        def __enter__(self):
-            for future in self.futures:
-                future._condition.acquire()
-
-        def __exit__(self, *args):
-            for future in self.futures:
-                future._condition.release()
-
-    def _create_and_install_waiters(fs, return_when):
-        if return_when == _AS_COMPLETED:
-            waiter = _AsCompletedWaiter()
-        elif return_when == FIRST_COMPLETED:
-            waiter = _FirstCompletedWaiter()
-        else:
-            pending_count = sum(
-                    f._state not in [CANCELLED_AND_NOTIFIED, FINISHED]
-                    for f in fs)
-
-            if return_when == FIRST_EXCEPTION:
-                waiter = _AllCompletedWaiter(pending_count,
-                                             stop_on_exception=True)
-            elif return_when == ALL_COMPLETED:
-                waiter = _AllCompletedWaiter(pending_count,
-                                             stop_on_exception=False)
-            else:
-                raise ValueError("Invalid return condition: %r" % return_when)
-
-        for f in fs:
-            f._waiters.append(waiter)
-
-        return waiter
-
-    def as_completed(fs, timeout=None):
-        """An iterator over the given futures that yields each as it completes.
-
-        Args:
-            fs: The sequence of Futures (possibly created by different
-                Executors) to iterate over.
-            timeout: The maximum number of seconds to wait. If None, then there
-                is no limit on the wait time.
-
-        Returns:
-            An iterator that yields the given Futures as they complete
-            (finished or cancelled). If any given Futures are duplicated, they
-            will be returned once.
-
-        Raises:
-            TimeoutError: If the entire result iterator could not be generated
-                before the given timeout.
-        """
-        if timeout is not None:
-            end_time = timeout + time.time()
-
-        fs = set(fs)
-        with _AcquireFutures(fs):
-            finished = set(
-                    f for f in fs
-                    if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
-            pending = fs - finished
-            waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
-
-        try:
-            for future in finished:
-                yield future
-
-            while pending:
-                if timeout is None:
-                    wait_timeout = None
-                else:
-                    wait_timeout = end_time - time.time()
-                    if wait_timeout < 0:
-                        raise TimeoutError('%d (of %d) futures unfinished' % (
-                            len(pending), len(fs)))
-
-                waiter.event.wait(wait_timeout)
-
-                with waiter.lock:
-                    finished = waiter.finished_futures
-                    waiter.finished_futures = []
-                    waiter.event.clear()
-
-                for future in finished:
-                    yield future
-                    pending.remove(future)
-
-        finally:
-            for f in fs:
-                with f._condition:
-                    f._waiters.remove(waiter)
-
-    DoneAndNotDoneFutures = collections.namedtuple(
-            'DoneAndNotDoneFutures', 'done not_done')
-
-    def wait(fs, timeout=None, return_when=ALL_COMPLETED):
-        """Wait for the futures in the given sequence to complete.
-
-        Args:
-            fs: The sequence of Futures (possibly created by different
-                Executors) to wait upon.
-            timeout: The maximum number of seconds to wait. If None, then there
-                is no limit on the wait time.
-            return_when: Indicates when this function should return. The
-                options are:
-
-                FIRST_COMPLETED - Return when any future finishes or is
-                                cancelled.
-                FIRST_EXCEPTION - Return when any future finishes by raising an
-                                exception. If no future raises an exception
-                                then it is equivalent to ALL_COMPLETED.
-                ALL_COMPLETED -   Return when all futures finish or are
-                                cancelled.
-
-        Returns:
-            A named 2-tuple of sets. The first set, named 'done', contains the
-            futures that completed (is finished or cancelled) before the wait
-            completed. The second set, named 'not_done', contains uncompleted
-            futures.
-        """
-        with _AcquireFutures(fs):
-            done = set(f for f in fs
-                       if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
-            not_done = set(fs) - done
-
-            if (return_when == FIRST_COMPLETED) and done:
-                return DoneAndNotDoneFutures(done, not_done)
-            elif (return_when == FIRST_EXCEPTION) and done:
-                if any(f for f in done
-                       if not f.cancelled() and f.exception() is not None):
-                    return DoneAndNotDoneFutures(done, not_done)
-
-            if len(done) == len(fs):
-                return DoneAndNotDoneFutures(done, not_done)
-
-            waiter = _create_and_install_waiters(fs, return_when)
-
-        waiter.event.wait(timeout)
-        for f in fs:
-            with f._condition:
-                f._waiters.remove(waiter)
-
-        done.update(waiter.finished_futures)
-        return DoneAndNotDoneFutures(done, set(fs) - done)
-
-    class _BaseFuture(object):
-        """Represents the result of an asynchronous computation."""
-
-        def __init__(self):
-            """Initializes the future. Should not be called by clients."""
-            self._condition = threading.Condition()
-            self._state = PENDING
-            self._result = None
-            self._exception = None
-            self._waiters = []
-            self._done_callbacks = []
-
-        def __repr__(self):
-            with self._condition:
-                if self._state == FINISHED:
-                    if self._exception:
-                        return '<%s at %#x state=%s raised %s>' % (
-                            self.__class__.__name__,
-                            id(self),
-                            _STATE_TO_DESCRIPTION_MAP[self._state],
-                            self._exception.__class__.__name__)
-                    else:
-                        return '<%s at %#x state=%s returned %s>' % (
-                            self.__class__.__name__,
-                            id(self),
-                            _STATE_TO_DESCRIPTION_MAP[self._state],
-                            self._result.__class__.__name__)
-                return '<%s at %#x state=%s>' % (
-                        self.__class__.__name__,
-                        id(self),
-                        _STATE_TO_DESCRIPTION_MAP[self._state])
-
-        def cancel(self):
-            """Cancel the future if possible.
-
-            Returns True if the future was cancelled, False otherwise. A future
-            cannot be cancelled if it is running or has already completed.
-            """
-            with self._condition:
-                if self._state in [RUNNING, FINISHED]:
-                    return False
-
-                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                    return True
-
-                self._state = CANCELLED
-                self._condition.notify_all()
-
-            self._invoke_callbacks()
-            return True
-
-        def cancelled(self):
-            """Return True if the future was cancelled."""
-            with self._condition:
-                return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]
-
-        def running(self):
-            """Return True if the future is currently executing."""
-            with self._condition:
-                return self._state == RUNNING
-
-        def done(self):
-            """Return True of the future was cancelled or finished executing.
-            """
-            with self._condition:
-                return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED,
-                                       FINISHED]
-
-        def __get_result(self):
-            if self._exception:
-                raise self._exception
-            else:
-                return self._result
-
-        def add_done_callback(self, fn):
-            """Attaches a callable that will be called when the future finishes.
-
-            Args:
-                fn: A callable that will be called with this future as its only
-                    argument when the future completes or is cancelled. The
-                    callable will always be called by a thread in the same
-                    process in which it was added. If the future has already
-                    completed or been cancelled then the callable will be
-                    called immediately. These callables are called in the order
-                    that they were added.
-            """
-            with self._condition:
-                if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED,
-                                       FINISHED]:
-                    self._done_callbacks.append(fn)
-                    return
-            fn(self)
-
-        def result(self, timeout=None):
-            """Return the result of the call that the future represents.
-
-            Args:
-                timeout: The number of seconds to wait for the result if the
-                    future isn't done. If None, then there is no limit on the
-                    wait time.
-
-            Returns:
-                The result of the call that the future represents.
-
-            Raises:
-                CancelledError: If the future was cancelled.
-                TimeoutError: If the future didn't finish executing before the
-                    given timeout.
-                Exception: If the call raised then that exception will be
-                raised.
-            """
-            with self._condition:
-                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                    raise CancelledError()
-                elif self._state == FINISHED:
-                    return self.__get_result()
-
-                self._condition.wait(timeout)
-
-                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                    raise CancelledError()
-                elif self._state == FINISHED:
-                    return self.__get_result()
-                else:
-                    raise TimeoutError()
-
-        def exception(self, timeout=None):
-            """Return the exception raised by the call that the future
-            represents.
-
-            Args:
-                timeout: The number of seconds to wait for the exception if the
-                    future isn't done. If None, then there is no limit on the
-                    wait time.
-
-            Returns:
-                The exception raised by the call that the future represents or
-                None if the call completed without raising.
-
-            Raises:
-                CancelledError: If the future was cancelled.
-                TimeoutError: If the future didn't finish executing before the
-                    given timeout.
-            """
-
-            with self._condition:
-                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                    raise CancelledError()
-                elif self._state == FINISHED:
-                    return self._exception
-
-                self._condition.wait(timeout)
-
-                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                    raise CancelledError()
-                elif self._state == FINISHED:
-                    return self._exception
-                else:
-                    raise TimeoutError()
-
-        # The following methods should only be used by Executors and in tests.
-        def set_running_or_notify_cancel(self):
-            """Mark the future as running or process any cancel notifications.
-
-            Should only be used by Executor implementations and unit tests.
-
-            If the future has been cancelled (cancel() was called and returned
-            True) then any threads waiting on the future completing (though
-            calls to as_completed() or wait()) are notified and False is
-            returned.
-
-            If the future was not cancelled then it is put in the running state
-            (future calls to running() will return True) and True is returned.
-
-            This method should be called by Executor implementations before
-            executing the work associated with this future. If this method
-            returns False then the work should not be executed.
-
-            Returns:
-                False if the Future was cancelled, True otherwise.
-
-            Raises:
-                RuntimeError: if this method was already called or if
-                    set_result() or set_exception() was called.
-            """
-            with self._condition:
-                if self._state == CANCELLED:
-                    self._state = CANCELLED_AND_NOTIFIED
-                    for waiter in self._waiters:
-                        waiter.add_cancelled(self)
-                    # self._condition.notify_all() is not necessary because
-                    # self.cancel() triggers a notification.
-                    return False
-                elif self._state == PENDING:
-                    self._state = RUNNING
-                    return True
-                else:
-                    LOGGER.critical('Future %s in unexpected state: %s',
-                                    id(self),
-                                    self._state)
-                    raise RuntimeError('Future in unexpected state')
-
-        def set_result(self, result):
-            """Sets the return value of work associated with the future.
-
-            Should only be used by Executor implementations and unit tests.
-            """
-            with self._condition:
-                self._result = result
-                self._state = FINISHED
-                for waiter in self._waiters:
-                    waiter.add_result(self)
-                self._condition.notify_all()
-            self._invoke_callbacks()
-
-        def set_exception(self, exception):
-            """Sets the result of the future as being the given exception.
-
-            Should only be used by Executor implementations and unit tests.
-            """
-            with self._condition:
-                self._exception = exception
-                self._state = FINISHED
-                for waiter in self._waiters:
-                    waiter.add_exception(self)
-                self._condition.notify_all()
-            self._invoke_callbacks()
-
-    class Executor(object):
-        """This is an abstract base class for concrete asynchronous executors.
-        """
-
-        def submit(self, fn, *args, **kwargs):
-            """Submits a callable to be executed with the given arguments.
-
-            Schedules the callable to be executed as fn(*args, **kwargs) and
-            returns a Future instance representing the execution of the
-            callable.
-
-            Returns:
-                A Future representing the given call.
-            """
-            raise NotImplementedError()
-
-        def map(self, fn, *iterables, **kwargs):
-            """Returns an iterator equivalent to map(fn, iter).
-
-            Args:
-                fn: A callable that will take as many arguments as there are
-                    passed iterables.
-                timeout: The maximum number of seconds to wait. If None, then
-                    there is no limit on the wait time.
-                chunksize: The size of the chunks the iterable will be broken
-                    into before being passed to a child process. This argument
-                    is only used by ProcessPoolExecutor; it is ignored by
-                    ThreadPoolExecutor.
-
-            Returns:
-                An iterator equivalent to: map(func, *iterables) but the calls
-                may be evaluated out-of-order.
-
-            Raises:
-                TimeoutError: If the entire result iterator could not be
-                    generated before the given timeout.
-                Exception: If fn(*args) raises for any values.
-            """
-            timeout = kwargs.get('timeout')
-            if timeout is not None:
-                end_time = timeout + time.time()
-
-            fs = [self.submit(fn, *args) for args in zip(*iterables)]
-
-            # Yield must be hidden in closure so that the futures are submitted
-            # before the first iterator value is required.
-            def result_iterator():
-                try:
-                    for future in fs:
-                        if timeout is None:
-                            yield future.result()
-                        else:
-                            yield future.result(end_time - time.time())
-                finally:
-                    for future in fs:
-                        future.cancel()
-            return result_iterator()
-
-        def shutdown(self, wait=True):
-            """Clean-up the resources associated with the Executor.
-
-            It is safe to call this method several times. Otherwise, no other
-            methods can be called after this one.
-
-            Args:
-                wait: If True then shutdown will not return until all running
-                    futures have finished executing and the resources used by
-                    the executor have been reclaimed.
-            """
-            pass
-
-        def __enter__(self):
-            return self
-
-        def __exit__(self, exc_type, exc_val, exc_tb):
-            self.shutdown(wait=True)
-            return False
+from concurrent.futures import Future as _BaseFuture
+from concurrent.futures._base import LOGGER
 
 
 # To make loky._base.Future instances awaitable  by concurrent.futures.wait,
 # derive our custom Future class from _BaseFuture. _invoke_callback is the only
 # modification made to this class in loky.
+# TODO investigate why using `concurrent.futures.Future` directly does not
+# always work in our test suite.
 class Future(_BaseFuture):
     def _invoke_callbacks(self):
         for callback in self._done_callbacks:
             try:
                 callback(self)
             except BaseException:
-                LOGGER.exception('exception calling callback for %r', self)
+                LOGGER.exception(f'exception calling callback for {self!r}')
diff --git a/joblib/externals/loky/backend/__init__.py b/joblib/externals/loky/backend/__init__.py
index a65ce0e8b..c31023cc5 100644
--- a/joblib/externals/loky/backend/__init__.py
+++ b/joblib/externals/loky/backend/__init__.py
@@ -1,16 +1,14 @@
 import os
-import sys
+from multiprocessing import synchronize
 
 from .context import get_context
 
-if sys.version_info > (3, 4):
 
-    def _make_name():
-        name = '/loky-%i-%s' % (os.getpid(), next(synchronize.SemLock._rand))
-        return name
+def _make_name():
+    return f'/loky-{os.getpid()}-{next(synchronize.SemLock._rand)}'
 
-    # monkey patch the name creation for multiprocessing
-    from multiprocessing import synchronize
-    synchronize.SemLock._make_name = staticmethod(_make_name)
+
+# monkey patch the name creation for multiprocessing
+synchronize.SemLock._make_name = staticmethod(_make_name)
 
 __all__ = ["get_context"]
diff --git a/joblib/externals/loky/backend/_posix_reduction.py b/joblib/externals/loky/backend/_posix_reduction.py
index e0e394d3c..e9f34ed56 100644
--- a/joblib/externals/loky/backend/_posix_reduction.py
+++ b/joblib/externals/loky/backend/_posix_reduction.py
@@ -7,18 +7,12 @@
 #  * Add adapted reduction for LokyProcesses and socket/Connection
 #
 import os
-import sys
 import socket
 import _socket
+from multiprocessing.connection import Connection
+from multiprocessing.context import get_spawning_popen
 
 from .reduction import register
-from .context import get_spawning_popen
-
-if sys.version_info >= (3, 3):
-    from multiprocessing.connection import Connection
-else:
-    from _multiprocessing import Connection
-
 
 HAVE_SEND_HANDLE = (hasattr(socket, 'CMSG_LEN') and
                     hasattr(socket, 'SCM_RIGHTS') and
@@ -26,8 +20,7 @@
 
 
 def _mk_inheritable(fd):
-    if sys.version_info[:2] > (3, 3):
-        os.set_inheritable(fd, True)
+    os.set_inheritable(fd, True)
     return fd
 
 
@@ -36,7 +29,7 @@ def DupFd(fd):
     popen_obj = get_spawning_popen()
     if popen_obj is not None:
         return popen_obj.DupFd(popen_obj.duplicate_for_child(fd))
-    elif HAVE_SEND_HANDLE and sys.version_info[:2] > (3, 3):
+    elif HAVE_SEND_HANDLE:
         from multiprocessing import resource_sharer
         return resource_sharer.DupFd(fd)
     else:
@@ -46,31 +39,26 @@ def DupFd(fd):
         )
 
 
-if sys.version_info[:2] != (3, 3):
-    def _reduce_socket(s):
-        df = DupFd(s.fileno())
-        return _rebuild_socket, (df, s.family, s.type, s.proto)
+def _reduce_socket(s):
+    df = DupFd(s.fileno())
+    return _rebuild_socket, (df, s.family, s.type, s.proto)
 
-    def _rebuild_socket(df, family, type, proto):
-        fd = df.detach()
-        return socket.fromfd(fd, family, type, proto)
-else:
-    from multiprocessing.reduction import reduce_socket as _reduce_socket
 
+def _rebuild_socket(df, family, type, proto):
+    fd = df.detach()
+    return socket.fromfd(fd, family, type, proto)
 
-register(socket.socket, _reduce_socket)
-register(_socket.socket, _reduce_socket)
 
+def rebuild_connection(df, readable, writable):
+    fd = df.detach()
+    return Connection(fd, readable, writable)
 
-if sys.version_info[:2] != (3, 3):
-    def reduce_connection(conn):
-        df = DupFd(conn.fileno())
-        return rebuild_connection, (df, conn.readable, conn.writable)
 
-    def rebuild_connection(df, readable, writable):
-        fd = df.detach()
-        return Connection(fd, readable, writable)
-else:
-    from multiprocessing.reduction import reduce_connection
+def reduce_connection(conn):
+    df = DupFd(conn.fileno())
+    return rebuild_connection, (df, conn.readable, conn.writable)
 
+
+register(socket.socket, _reduce_socket)
+register(_socket.socket, _reduce_socket)
 register(Connection, reduce_connection)
diff --git a/joblib/externals/loky/backend/_posix_wait.py b/joblib/externals/loky/backend/_posix_wait.py
deleted file mode 100644
index d935882dc..000000000
--- a/joblib/externals/loky/backend/_posix_wait.py
+++ /dev/null
@@ -1,105 +0,0 @@
-###############################################################################
-# Compat for wait function on UNIX based system
-#
-# author: Thomas Moreau and Olivier Grisel
-#
-# adapted from multiprocessing/connection.py (17/02/2017)
-#  * Backport wait function to python2.7
-#
-
-import platform
-import select
-import socket
-import errno
-SYSTEM = platform.system()
-
-try:
-    import ctypes
-except ImportError:  # pragma: no cover
-    ctypes = None  # noqa
-
-if SYSTEM == 'Darwin' and ctypes is not None:
-    from ctypes.util import find_library
-    libSystem = ctypes.CDLL(find_library('libSystem.dylib'))
-    CoreServices = ctypes.CDLL(find_library('CoreServices'),
-                               use_errno=True)
-    mach_absolute_time = libSystem.mach_absolute_time
-    mach_absolute_time.restype = ctypes.c_uint64
-    absolute_to_nanoseconds = CoreServices.AbsoluteToNanoseconds
-    absolute_to_nanoseconds.restype = ctypes.c_uint64
-    absolute_to_nanoseconds.argtypes = [ctypes.c_uint64]
-
-    def monotonic():
-        return absolute_to_nanoseconds(mach_absolute_time()) * 1e-9
-
-elif SYSTEM == 'Linux' and ctypes is not None:
-    # from stackoverflow:
-    # questions/1205722/how-do-i-get-monotonic-time-durations-in-python
-    import ctypes
-    import os
-
-    CLOCK_MONOTONIC = 1  # see <linux/time.h>
-
-    class timespec(ctypes.Structure):
-        _fields_ = [
-            ('tv_sec', ctypes.c_long),
-            ('tv_nsec', ctypes.c_long),
-        ]
-
-    librt = ctypes.CDLL('librt.so.1', use_errno=True)
-    clock_gettime = librt.clock_gettime
-    clock_gettime.argtypes = [
-        ctypes.c_int, ctypes.POINTER(timespec),
-    ]
-
-    def monotonic():  # noqa
-        t = timespec()
-        if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0:
-            errno_ = ctypes.get_errno()
-            raise OSError(errno_, os.strerror(errno_))
-        return t.tv_sec + t.tv_nsec * 1e-9
-else:  # pragma: no cover
-    from time import time as monotonic
-
-
-if hasattr(select, 'poll'):
-    def _poll(fds, timeout):
-        if timeout is not None:
-            timeout = int(timeout * 1000)  # timeout is in milliseconds
-        fd_map = {}
-        pollster = select.poll()
-        for fd in fds:
-            pollster.register(fd, select.POLLIN)
-            if hasattr(fd, 'fileno'):
-                fd_map[fd.fileno()] = fd
-            else:
-                fd_map[fd] = fd
-        ls = []
-        for fd, event in pollster.poll(timeout):
-            if event & select.POLLNVAL:  # pragma: no cover
-                raise ValueError('invalid file descriptor %i' % fd)
-            ls.append(fd_map[fd])
-        return ls
-else:
-    def _poll(fds, timeout):
-        return select.select(fds, [], [], timeout)[0]
-
-
-def wait(object_list, timeout=None):
-    '''
-    Wait till an object in object_list is ready/readable.
-    Returns list of those objects which are ready/readable.
-    '''
-    if timeout is not None:
-        if timeout <= 0:
-            return _poll(object_list, 0)
-        else:
-            deadline = monotonic() + timeout
-    while True:
-        try:
-            return _poll(object_list, timeout)
-        except (OSError, IOError, socket.error) as e:  # pragma: no cover
-            if e.errno != errno.EINTR:
-                raise
-        if timeout is not None:
-            timeout = deadline - monotonic()
diff --git a/joblib/externals/loky/backend/_win_reduction.py b/joblib/externals/loky/backend/_win_reduction.py
index 142e6e7c8..7f50c9f61 100644
--- a/joblib/externals/loky/backend/_win_reduction.py
+++ b/joblib/externals/loky/backend/_win_reduction.py
@@ -7,93 +7,54 @@
 #  * Add adapted reduction for LokyProcesses and socket/PipeConnection
 #
 import os
-import sys
 import socket
-from .reduction import register
-
-
-if sys.platform == 'win32':
-    if sys.version_info[:2] < (3, 3):
-        from _multiprocessing import PipeConnection
-    else:
-        import _winapi
-        from multiprocessing.connection import PipeConnection
-
-
-if sys.version_info[:2] >= (3, 4) and sys.platform == 'win32':
-    class DupHandle(object):
-        def __init__(self, handle, access, pid=None):
-            # duplicate handle for process with given pid
-            if pid is None:
-                pid = os.getpid()
-            proc = _winapi.OpenProcess(_winapi.PROCESS_DUP_HANDLE, False, pid)
-            try:
-                self._handle = _winapi.DuplicateHandle(
-                    _winapi.GetCurrentProcess(),
-                    handle, proc, access, False, 0)
-            finally:
-                _winapi.CloseHandle(proc)
-            self._access = access
-            self._pid = pid
-
-        def detach(self):
-            # retrieve handle from process which currently owns it
-            if self._pid == os.getpid():
-                return self._handle
-            proc = _winapi.OpenProcess(_winapi.PROCESS_DUP_HANDLE, False,
-                                       self._pid)
-            try:
-                return _winapi.DuplicateHandle(
-                    proc, self._handle, _winapi.GetCurrentProcess(),
-                    self._access, False, _winapi.DUPLICATE_CLOSE_SOURCE)
-            finally:
-                _winapi.CloseHandle(proc)
-
-    def reduce_pipe_connection(conn):
-        access = ((_winapi.FILE_GENERIC_READ if conn.readable else 0) |
-                  (_winapi.FILE_GENERIC_WRITE if conn.writable else 0))
-        dh = DupHandle(conn.fileno(), access)
-        return rebuild_pipe_connection, (dh, conn.readable, conn.writable)
+import _winapi
+from multiprocessing.connection import PipeConnection
+from multiprocessing.reduction import _reduce_socket
 
-    def rebuild_pipe_connection(dh, readable, writable):
-        from multiprocessing.connection import PipeConnection
-        handle = dh.detach()
-        return PipeConnection(handle, readable, writable)
-    register(PipeConnection, reduce_pipe_connection)
-
-elif sys.platform == 'win32':
-    # Older Python versions
-    from multiprocessing.reduction import reduce_pipe_connection
-    register(PipeConnection, reduce_pipe_connection)
-
-
-if sys.version_info[:2] < (3, 3) and sys.platform == 'win32':
-    from _multiprocessing import win32
-    from multiprocessing.reduction import reduce_handle, rebuild_handle
-    close = win32.CloseHandle
-
-    def fromfd(handle, family, type_, proto=0):
-        s = socket.socket(family, type_, proto, fileno=handle)
-        if s.__class__ is not socket.socket:
-            s = socket.socket(_sock=s)
-        return s
-
-    def reduce_socket(s):
-        if not hasattr(socket, "fromfd"):
-            raise TypeError("sockets cannot be pickled on this system.")
-        reduced_handle = reduce_handle(s.fileno())
-        return _rebuild_socket, (reduced_handle, s.family, s.type, s.proto)
+from .reduction import register
 
-    def _rebuild_socket(reduced_handle, family, type_, proto):
-        handle = rebuild_handle(reduced_handle)
-        s = fromfd(handle, family, type_, proto)
-        close(handle)
-        return s
 
-    register(socket.socket, reduce_socket)
-elif sys.version_info[:2] < (3, 4):
-    from multiprocessing.reduction import reduce_socket
-    register(socket.socket, reduce_socket)
-else:
-    from multiprocessing.reduction import _reduce_socket
-    register(socket.socket, _reduce_socket)
+class DupHandle:
+    def __init__(self, handle, access, pid=None):
+        # duplicate handle for process with given pid
+        if pid is None:
+            pid = os.getpid()
+        proc = _winapi.OpenProcess(_winapi.PROCESS_DUP_HANDLE, False, pid)
+        try:
+            self._handle = _winapi.DuplicateHandle(
+                _winapi.GetCurrentProcess(),
+                handle, proc, access, False, 0)
+        finally:
+            _winapi.CloseHandle(proc)
+        self._access = access
+        self._pid = pid
+
+    def detach(self):
+        # retrieve handle from process which currently owns it
+        if self._pid == os.getpid():
+            return self._handle
+        proc = _winapi.OpenProcess(_winapi.PROCESS_DUP_HANDLE, False,
+                                   self._pid)
+        try:
+            return _winapi.DuplicateHandle(
+                proc, self._handle, _winapi.GetCurrentProcess(),
+                self._access, False, _winapi.DUPLICATE_CLOSE_SOURCE)
+        finally:
+            _winapi.CloseHandle(proc)
+
+
+def rebuild_pipe_connection(dh, readable, writable):
+    handle = dh.detach()
+    return PipeConnection(handle, readable, writable)
+
+
+def reduce_pipe_connection(conn):
+    access = ((_winapi.FILE_GENERIC_READ if conn.readable else 0) |
+              (_winapi.FILE_GENERIC_WRITE if conn.writable else 0))
+    dh = DupHandle(conn.fileno(), access)
+    return rebuild_pipe_connection, (dh, conn.readable, conn.writable)
+
+
+register(PipeConnection, reduce_pipe_connection)
+register(socket.socket, _reduce_socket)
diff --git a/joblib/externals/loky/backend/_win_wait.py b/joblib/externals/loky/backend/_win_wait.py
deleted file mode 100644
index 73271316d..000000000
--- a/joblib/externals/loky/backend/_win_wait.py
+++ /dev/null
@@ -1,58 +0,0 @@
-###############################################################################
-# Compat for wait function on Windows system
-#
-# author: Thomas Moreau and Olivier Grisel
-#
-# adapted from multiprocessing/connection.py (17/02/2017)
-#  * Backport wait function to python2.7
-#
-
-import ctypes
-import sys
-from time import sleep
-
-
-if sys.platform == 'win32' and sys.version_info[:2] < (3, 3):
-    from _subprocess import WaitForSingleObject, WAIT_OBJECT_0
-
-    try:
-        from time import monotonic
-    except ImportError:
-        # Backward old for crappy old Python that did not have cross-platform
-        # monotonic clock by default.
-
-        # TODO: do we want to add support for cygwin at some point? See:
-        # https://github.com/atdt/monotonic/blob/master/monotonic.py
-        GetTickCount64 = ctypes.windll.kernel32.GetTickCount64
-        GetTickCount64.restype = ctypes.c_ulonglong
-
-        def monotonic():
-            """Monotonic clock, cannot go backward."""
-            return GetTickCount64() / 1000.0
-
-    def wait(handles, timeout=None):
-        """Backward compat for python2.7
-
-        This function wait for either:
-        * one connection is ready for read,
-        * one process handle has exited or got killed,
-        * timeout is reached. Note that this function has a precision of 2
-          msec.
-        """
-        if timeout is not None:
-            deadline = monotonic() + timeout
-
-        while True:
-            # We cannot use select as in windows it only support sockets
-            ready = []
-            for h in handles:
-                if type(h) in [int, long]:
-                    if WaitForSingleObject(h, 0) == WAIT_OBJECT_0:
-                        ready += [h]
-                elif h.poll(0):
-                    ready.append(h)
-            if len(ready) > 0:
-                return ready
-            sleep(.001)
-            if timeout is not None and deadline - monotonic() <= 0:
-                return []
diff --git a/joblib/externals/loky/backend/compat.py b/joblib/externals/loky/backend/compat.py
deleted file mode 100644
index aa406c6cf..000000000
--- a/joblib/externals/loky/backend/compat.py
+++ /dev/null
@@ -1,41 +0,0 @@
-###############################################################################
-# Compat file to import the correct modules for each platform and python
-# version.
-#
-# author: Thomas Moreau and Olivier grisel
-#
-import sys
-
-PY3 = sys.version_info[:2] >= (3, 3)
-
-if PY3:
-    import queue
-else:
-    import Queue as queue
-
-if sys.version_info >= (3, 4):
-    from multiprocessing.process import BaseProcess
-else:
-    from multiprocessing.process import Process as BaseProcess
-
-# Platform specific compat
-if sys.platform == "win32":
-    from .compat_win32 import wait
-else:
-    from .compat_posix import wait
-
-
-def set_cause(exc, cause):
-    exc.__cause__ = cause
-
-    if not PY3:
-        # Preformat message here.
-        if exc.__cause__ is not None:
-            exc.args = ("{}\n\nThis was caused directly by {}".format(
-                exc.args if len(exc.args) != 1 else exc.args[0],
-                str(exc.__cause__)),)
-
-    return exc
-
-
-__all__ = ["queue", "BaseProcess", "set_cause", "wait"]
diff --git a/joblib/externals/loky/backend/compat_posix.py b/joblib/externals/loky/backend/compat_posix.py
deleted file mode 100644
index c8e4e4a43..000000000
--- a/joblib/externals/loky/backend/compat_posix.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# flake8: noqa
-###############################################################################
-# Compat file to load the correct wait function
-#
-# author: Thomas Moreau and Olivier grisel
-#
-import sys
-
-# Compat wait
-if sys.version_info < (3, 3):
-    from ._posix_wait import wait
-else:
-    from multiprocessing.connection import wait
diff --git a/joblib/externals/loky/backend/compat_win32.py b/joblib/externals/loky/backend/compat_win32.py
deleted file mode 100644
index 5df15f55f..000000000
--- a/joblib/externals/loky/backend/compat_win32.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# flake8: noqa: F401
-import sys
-import numbers
-
-if sys.platform == "win32":
-    # Avoid import error by code introspection tools such as test runners
-    # trying to import this module while running on non-Windows systems.
-
-    # Compat Popen
-    if sys.version_info[:2] >= (3, 4):
-        from multiprocessing.popen_spawn_win32 import Popen
-    else:
-        from multiprocessing.forking import Popen
-
-    # wait compat
-    if sys.version_info[:2] < (3, 3):
-        from ._win_wait import wait
-    else:
-        from multiprocessing.connection import wait
-
-    # Compat _winapi
-    if sys.version_info[:2] >= (3, 4):
-        import _winapi
-    else:
-        import os
-        import msvcrt
-        if sys.version_info[:2] < (3, 3):
-            import _subprocess as win_api
-            from _multiprocessing import win32
-        else:
-            import _winapi as win_api
-
-        class _winapi:
-            CreateProcess = win_api.CreateProcess
-
-            @staticmethod
-            def CloseHandle(h):
-                if isinstance(h, numbers.Integral):
-                    # Cast long to int for 64-bit Python 2.7 under Windows
-                    h = int(h)
-                if sys.version_info[:2] < (3, 3):
-                    if not isinstance(h, int):
-                        h = h.Detach()
-                    win32.CloseHandle(h)
-                else:
-                    win_api.CloseHandle(h)
diff --git a/joblib/externals/loky/backend/context.py b/joblib/externals/loky/backend/context.py
index 76f6520d3..7e551688b 100644
--- a/joblib/externals/loky/backend/context.py
+++ b/joblib/externals/loky/backend/context.py
@@ -1,6 +1,5 @@
 ###############################################################################
-# Basic context management with LokyContext and  provides
-# compat for UNIX 2.7 and 3.3
+# Basic context management with LokyContext
 #
 # author: Thomas Moreau and Olivier Grisel
 #
@@ -8,88 +7,46 @@
 #  * Create a context ensuring loky uses only objects that are compatible
 #  * Add LokyContext to the list of context of multiprocessing so loky can be
 #    used with multiprocessing.set_start_method
-#  * Add some compat function for python2.7 and 3.3.
+#  * Implement a CFS-aware amd physical-core aware cpu_count function.
 #
-from __future__ import division
-
 import os
 import sys
+import math
 import subprocess
 import traceback
 import warnings
 import multiprocessing as mp
-
+from multiprocessing import get_context as mp_get_context
+from multiprocessing.context import BaseContext
 
 from .process import LokyProcess, LokyInitMainProcess
 
-START_METHODS = ['loky', 'loky_init_main']
+START_METHODS = ['loky', 'loky_init_main', 'spawn']
+if sys.platform != 'win32':
+    START_METHODS += ['fork', 'forkserver']
+
 _DEFAULT_START_METHOD = None
 
 # Cache for the number of physical cores to avoid repeating subprocess calls.
 # It should not change during the lifetime of the program.
 physical_cores_cache = None
 
-if sys.version_info[:2] >= (3, 4):
-    from multiprocessing import get_context as mp_get_context
-    from multiprocessing.context import assert_spawning, set_spawning_popen
-    from multiprocessing.context import get_spawning_popen, BaseContext
-
-    START_METHODS += ['spawn']
-    if sys.platform != 'win32':
-        START_METHODS += ['fork', 'forkserver']
-
-    def get_context(method=None):
-        # Try to overload the default context
-        method = method or _DEFAULT_START_METHOD or "loky"
-        if method == "fork":
-            # If 'fork' is explicitly requested, warn user about potential
-            # issues.
-            warnings.warn("`fork` start method should not be used with "
-                          "`loky` as it does not respect POSIX. Try using "
-                          "`spawn` or `loky` instead.", UserWarning)
-        try:
-            context = mp_get_context(method)
-        except ValueError:
-            raise ValueError("Unknown context '{}'. Value should be in {}."
-                             .format(method, START_METHODS))
-
-        return context
-
-else:
-    if sys.platform != 'win32':
-        import threading
-        # Mechanism to check that the current thread is spawning a process
-        _tls = threading.local()
-        popen_attr = 'spawning_popen'
-    else:
-        from multiprocessing.forking import Popen
-        _tls = Popen._tls
-        popen_attr = 'process_handle'
-
-    BaseContext = object
-
-    def get_spawning_popen():
-        return getattr(_tls, popen_attr, None)
-
-    def set_spawning_popen(popen):
-        setattr(_tls, popen_attr, popen)
-
-    def assert_spawning(obj):
-        if get_spawning_popen() is None:
-            raise RuntimeError(
-                '%s objects should only be shared between processes'
-                ' through inheritance' % type(obj).__name__
-            )
-
-    def get_context(method=None):
-        method = method or _DEFAULT_START_METHOD or 'loky'
-        if method == "loky":
-            return LokyContext()
-        elif method == "loky_init_main":
-            return LokyInitMainContext()
-        else:
-            raise ValueError("Unknown context '{}'. Value should be in {}."
-                             .format(method, START_METHODS))
+
+def get_context(method=None):
+    # Try to overload the default context
+    method = method or _DEFAULT_START_METHOD or "loky"
+    if method == "fork":
+        # If 'fork' is explicitly requested, warn user about potential issues.
+        warnings.warn("`fork` start method should not be used with "
+                      "`loky` as it does not respect POSIX. Try using "
+                      "`spawn` or `loky` instead.", UserWarning)
+    try:
+        return mp_get_context(method)
+    except ValueError:
+        raise ValueError(
+            f"Unknown context '{method}'. Value should be in "
+            f"{START_METHODS}."
+        )
 
 
 def set_start_method(method, force=False):
@@ -97,8 +54,9 @@ def set_start_method(method, force=False):
     if _DEFAULT_START_METHOD is not None and not force:
         raise RuntimeError('context has already been set')
     assert method is None or method in START_METHODS, (
-        "'{}' is not a valid start_method. It should be in {}"
-        .format(method, START_METHODS))
+        f"'{method}' is not a valid start_method. It should be in "
+        f"{START_METHODS}"
+    )
 
     _DEFAULT_START_METHOD = method
 
@@ -114,8 +72,8 @@ def cpu_count(only_physical_cores=False):
      * the number of CPUs in the system, as given by
        ``multiprocessing.cpu_count``;
      * the CPU affinity settings of the current process
-       (available with Python 3.4+ on some Unix systems);
-     * CFS scheduler CPU bandwidth limit (available on Linux only, typically
+       (available on some Unix systems);
+     * Cgroup CPU bandwidth limit (available on Linux only, typically
        set by docker and similar container orchestration systems);
      * the value of the LOKY_MAX_CPU_COUNT environment variable if defined.
     and is given as the minimum of these constraints.
@@ -123,81 +81,95 @@ def cpu_count(only_physical_cores=False):
     If ``only_physical_cores`` is True, return the number of physical cores
     instead of the number of logical cores (hyperthreading / SMT). Note that
     this option is not enforced if the number of usable cores is controlled in
-    any other way such as: process affinity, restricting CFS scheduler policy
+    any other way such as: process affinity, Cgroup restricted CPU bandwidth
     or the LOKY_MAX_CPU_COUNT environment variable. If the number of physical
     cores is not found, return the number of logical cores.
- 
+
     It is also always larger or equal to 1.
     """
-    # TODO: use os.cpu_count when dropping python 2 support
-    try:
-        cpu_count_mp = mp.cpu_count()
-    except NotImplementedError:
-        cpu_count_mp = 1
-
-    cpu_count_user = _cpu_count_user(cpu_count_mp)
-    aggregate_cpu_count = min(cpu_count_mp, cpu_count_user)
-
-    if only_physical_cores:
-        cpu_count_physical, exception = _count_physical_cores()
-        if cpu_count_user < cpu_count_mp:
-            # Respect user setting
-            cpu_count = max(cpu_count_user, 1)
-        elif cpu_count_physical == "not found":
-            # Fallback to default behavior
-            if exception is not None:
-                # warns only the first time
-                warnings.warn(
-                    "Could not find the number of physical cores for the "
-                    "following reason:\n" + str(exception) + "\n"
-                    "Returning the number of logical cores instead. You can "
-                    "silence this warning by setting LOKY_MAX_CPU_COUNT to "
-                    "the number of cores you want to use.")
-                if sys.version_info >= (3, 5):
-                    # TODO remove the version check when dropping py2 support
-                    traceback.print_tb(exception.__traceback__)
-
-            cpu_count = max(aggregate_cpu_count, 1)
-        else:
-            return cpu_count_physical
+    # Note: os.cpu_count() is allowed to return None in its docstring
+    os_cpu_count = os.cpu_count() or 1
+
+    cpu_count_user = _cpu_count_user(os_cpu_count)
+    aggregate_cpu_count = max(min(os_cpu_count, cpu_count_user), 1)
+
+    if not only_physical_cores:
+        return aggregate_cpu_count
+
+    if cpu_count_user < os_cpu_count:
+        # Respect user setting
+        return max(cpu_count_user, 1)
+
+    cpu_count_physical, exception = _count_physical_cores()
+    if cpu_count_physical != "not found":
+        return cpu_count_physical
+
+    # Fallback to default behavior
+    if exception is not None:
+        # warns only the first time
+        warnings.warn(
+            "Could not find the number of physical cores for the "
+            f"following reason:\n{exception}\n"
+            "Returning the number of logical cores instead. You can "
+            "silence this warning by setting LOKY_MAX_CPU_COUNT to "
+            "the number of cores you want to use.")
+        traceback.print_tb(exception.__traceback__)
+
+    return aggregate_cpu_count
+
+
+def _cpu_count_cgroup(os_cpu_count):
+    # Cgroup CPU bandwidth limit available in Linux since 2.6 kernel
+    cpu_max_fname = "/sys/fs/cgroup/cpu.max"
+    cfs_quota_fname = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
+    cfs_period_fname = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
+    if os.path.exists(cpu_max_fname):
+        # cgroup v2
+        # https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
+        with open(cpu_max_fname) as fh:
+            cpu_quota_us, cpu_period_us = fh.read().strip().split()
+    elif os.path.exists(cfs_quota_fname) and os.path.exists(cfs_period_fname):
+        # cgroup v1
+        # https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management
+        with open(cfs_quota_fname) as fh:
+            cpu_quota_us = fh.read().strip()
+        with open(cfs_period_fname) as fh:
+            cpu_period_us = fh.read().strip()
     else:
-        cpu_count = max(aggregate_cpu_count, 1)
+        # No Cgroup CPU bandwidth limit (e.g. non-Linux platform)
+        cpu_quota_us = "max"
+        cpu_period_us = 100_000  # unused, for consistency with default values
 
-    return cpu_count
+    if cpu_quota_us == "max":
+        # No active Cgroup quota on a Cgroup-capable platform
+        return os_cpu_count
+    else:
+        cpu_quota_us = int(cpu_quota_us)
+        cpu_period_us = int(cpu_period_us)
+        if cpu_quota_us > 0 and cpu_period_us > 0:
+            return math.ceil(cpu_quota_us / cpu_period_us)
+        else:  # pragma: no cover
+            # Setting a negative cpu_quota_us value is a valid way to disable
+            # cgroup CPU bandwith limits
+            return os_cpu_count
 
 
-def _cpu_count_user(cpu_count_mp):
+def _cpu_count_user(os_cpu_count):
     """Number of user defined available CPUs"""
-    import math
-
     # Number of available CPUs given affinity settings
-    cpu_count_affinity = cpu_count_mp
+    cpu_count_affinity = os_cpu_count
     if hasattr(os, 'sched_getaffinity'):
         try:
             cpu_count_affinity = len(os.sched_getaffinity(0))
         except NotImplementedError:
             pass
 
-    # CFS scheduler CPU bandwidth limit
-    # available in Linux since 2.6 kernel
-    cpu_count_cfs = cpu_count_mp
-    cfs_quota_fname = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
-    cfs_period_fname = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
-    if os.path.exists(cfs_quota_fname) and os.path.exists(cfs_period_fname):
-        with open(cfs_quota_fname, 'r') as fh:
-            cfs_quota_us = int(fh.read())
-        with open(cfs_period_fname, 'r') as fh:
-            cfs_period_us = int(fh.read())
-
-        if cfs_quota_us > 0 and cfs_period_us > 0:
-            # Make sure this quantity is an int as math.ceil returns a
-            # float in python2.7. (See issue #165)
-            cpu_count_cfs = int(math.ceil(cfs_quota_us / cfs_period_us))
+    cpu_count_cgroup = _cpu_count_cgroup(os_cpu_count)
 
     # User defined soft-limit passed as a loky specific environment variable.
-    cpu_count_loky = int(os.environ.get('LOKY_MAX_CPU_COUNT', cpu_count_mp))
+    cpu_count_loky = int(os.environ.get('LOKY_MAX_CPU_COUNT', os_cpu_count))
 
-    return min(cpu_count_affinity, cpu_count_cfs, cpu_count_loky)
+    return min(cpu_count_affinity, cpu_count_cgroup, cpu_count_loky)
 
 
 def _count_physical_cores():
@@ -219,39 +191,39 @@ def _count_physical_cores():
     try:
         if sys.platform == "linux":
             cpu_info = subprocess.run(
-                "lscpu --parse=core".split(" "), capture_output=True)
-            cpu_info = cpu_info.stdout.decode("utf-8").splitlines()
+                "lscpu --parse=core".split(), capture_output=True, text=True)
+            cpu_info = cpu_info.stdout.splitlines()
             cpu_info = {line for line in cpu_info if not line.startswith("#")}
             cpu_count_physical = len(cpu_info)
         elif sys.platform == "win32":
             cpu_info = subprocess.run(
-                "wmic CPU Get NumberOfCores /Format:csv".split(" "),
-                capture_output=True)
-            cpu_info = cpu_info.stdout.decode('utf-8').splitlines()
+                "wmic CPU Get NumberOfCores /Format:csv".split(),
+                capture_output=True, text=True)
+            cpu_info = cpu_info.stdout.splitlines()
             cpu_info = [l.split(",")[1] for l in cpu_info
                         if (l and l != "Node,NumberOfCores")]
             cpu_count_physical = sum(map(int, cpu_info))
         elif sys.platform == "darwin":
             cpu_info = subprocess.run(
-                "sysctl -n hw.physicalcpu".split(" "), capture_output=True)
-            cpu_info = cpu_info.stdout.decode('utf-8')
+                "sysctl -n hw.physicalcpu".split(),
+                capture_output=True, text=True)
+            cpu_info = cpu_info.stdout
             cpu_count_physical = int(cpu_info)
         else:
-            raise NotImplementedError(
-                "unsupported platform: {}".format(sys.platform))
+            raise NotImplementedError(f"unsupported platform: {sys.platform}")
 
         # if cpu_count_physical < 1, we did not find a valid value
         if cpu_count_physical < 1:
             raise ValueError(
-                "found {} physical cores < 1".format(cpu_count_physical))
-        
+                f"found {cpu_count_physical} physical cores < 1")
+
     except Exception as e:
         exception = e
         cpu_count_physical = "not found"
 
     # Put the result in cache
     physical_cores_cache = cpu_count_physical
-    
+
     return cpu_count_physical, exception
 
 
@@ -272,44 +244,10 @@ def SimpleQueue(self, reducers=None):
         from .queues import SimpleQueue
         return SimpleQueue(reducers=reducers, ctx=self.get_context())
 
-    if sys.version_info[:2] < (3, 4):
-        """Compat for python2.7/3.3 for necessary methods in Context"""
-        def get_context(self):
-            return self
-
-        def get_start_method(self):
-            return self._name
-
-        def Pipe(self, duplex=True):
-            '''Returns two connection object connected by a pipe'''
-            return mp.Pipe(duplex)
-
-        if sys.platform != "win32":
-            """Use the compat Manager for python2.7/3.3 on UNIX to avoid
-            relying on fork processes
-            """
-            def Manager(self):
-                """Returns a manager object"""
-                from .managers import LokyManager
-                m = LokyManager()
-                m.start()
-                return m
-        else:
-            """Compat for context on Windows and python2.7/3.3. Using regular
-            multiprocessing objects as it does not rely on fork.
-            """
-            from multiprocessing import synchronize
-            Semaphore = staticmethod(synchronize.Semaphore)
-            BoundedSemaphore = staticmethod(synchronize.BoundedSemaphore)
-            Lock = staticmethod(synchronize.Lock)
-            RLock = staticmethod(synchronize.RLock)
-            Condition = staticmethod(synchronize.Condition)
-            Event = staticmethod(synchronize.Event)
-            Manager = staticmethod(mp.Manager)
-
     if sys.platform != "win32":
         """For Unix platform, use our custom implementation of synchronize
-        relying on ctypes to interface with pthread semaphores.
+        ensuring that we use the loky.backend.resource_tracker to clean-up
+        the semaphores in case of a worker crash.
         """
         def Semaphore(self, value=1):
             """Returns a semaphore object"""
@@ -352,7 +290,7 @@ class LokyInitMainContext(LokyContext):
     functions and variable used from main should be out of this block.
 
     This mimics the default behavior of multiprocessing under Windows and the
-    behavior of the ``spawn`` start method on a posix system for python3.4+.
+    behavior of the ``spawn`` start method on a posix system.
     For more details, see the end of the following section of python doc
     https://docs.python.org/3/library/multiprocessing.html#multiprocessing-programming
     """
@@ -360,8 +298,7 @@ class LokyInitMainContext(LokyContext):
     Process = LokyInitMainProcess
 
 
-if sys.version_info > (3, 4):
-    """Register loky context so it works with multiprocessing.get_context"""
-    ctx_loky = LokyContext()
-    mp.context._concrete_contexts['loky'] = ctx_loky
-    mp.context._concrete_contexts['loky_init_main'] = LokyInitMainContext()
+# Register loky context so it works with multiprocessing.get_context
+ctx_loky = LokyContext()
+mp.context._concrete_contexts['loky'] = ctx_loky
+mp.context._concrete_contexts['loky_init_main'] = LokyInitMainContext()
diff --git a/joblib/externals/loky/backend/fork_exec.py b/joblib/externals/loky/backend/fork_exec.py
index cfb68dc4e..211d1835a 100644
--- a/joblib/externals/loky/backend/fork_exec.py
+++ b/joblib/externals/loky/backend/fork_exec.py
@@ -7,24 +7,20 @@
 import os
 import sys
 
-if sys.platform == "darwin" and sys.version_info < (3, 3):
-    FileNotFoundError = OSError
-
 
 def close_fds(keep_fds):  # pragma: no cover
     """Close all the file descriptors except those in keep_fds."""
 
     # Make sure to keep stdout and stderr open for logging purpose
-    keep_fds = set(keep_fds).union([1, 2])
+    keep_fds = {*keep_fds, 1, 2}
 
     # We try to retrieve all the open fds
     try:
-        open_fds = set(int(fd) for fd in os.listdir('/proc/self/fd'))
+        open_fds = {int(fd) for fd in os.listdir('/proc/self/fd')}
     except FileNotFoundError:
         import resource
         max_nfds = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
-        open_fds = set(fd for fd in range(3, max_nfds))
-        open_fds.add(0)
+        open_fds = {*range(max_nfds)}
 
     for i in open_fds - keep_fds:
         try:
@@ -34,11 +30,9 @@ def close_fds(keep_fds):  # pragma: no cover
 
 
 def fork_exec(cmd, keep_fds, env=None):
-
     # copy the environment variables to set in the child process
-    env = {} if env is None else env
-    child_env = os.environ.copy()
-    child_env.update(env)
+    env = env or {}
+    child_env = {**os.environ, **env}
 
     pid = os.fork()
     if pid == 0:  # pragma: no cover
diff --git a/joblib/externals/loky/backend/managers.py b/joblib/externals/loky/backend/managers.py
deleted file mode 100644
index 081f8976e..000000000
--- a/joblib/externals/loky/backend/managers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-###############################################################################
-# compat for UNIX 2.7 and 3.3
-# Manager with LokyContext server.
-# This avoids having a Manager using fork and breaks the fd.
-#
-# author: Thomas Moreau and Olivier Grisel
-#
-# based on multiprocessing/managers.py (17/02/2017)
-#  * Overload the start method to use LokyContext and launch a loky subprocess
-#
-
-import multiprocessing as mp
-from multiprocessing.managers import SyncManager, State
-from .process import LokyProcess as Process
-
-
-class LokyManager(SyncManager):
-    def start(self, initializer=None, initargs=()):
-        '''Spawn a server process for this manager object'''
-        assert self._state.value == State.INITIAL
-
-        if (initializer is not None
-                and not hasattr(initializer, '__call__')):
-            raise TypeError('initializer must be a callable')
-
-        # pipe over which we will retrieve address of server
-        reader, writer = mp.Pipe(duplex=False)
-
-        # spawn process which runs a server
-        self._process = Process(
-            target=type(self)._run_server,
-            args=(self._registry, self._address, bytes(self._authkey),
-                  self._serializer, writer, initializer, initargs),
-        )
-        ident = ':'.join(str(i) for i in self._process._identity)
-        self._process.name = type(self).__name__ + '-' + ident
-        self._process.start()
-
-        # get address of server
-        writer.close()
-        self._address = reader.recv()
-        reader.close()
-
-        # register a finalizer
-        self._state.value = State.STARTED
-        self.shutdown = mp.util.Finalize(
-            self, type(self)._finalize_manager,
-            args=(self._process, self._address, self._authkey,
-                  self._state, self._Client),
-            exitpriority=0
-        )
diff --git a/joblib/externals/loky/backend/popen_loky_posix.py b/joblib/externals/loky/backend/popen_loky_posix.py
index 970dead0b..37a73172e 100644
--- a/joblib/externals/loky/backend/popen_loky_posix.py
+++ b/joblib/externals/loky/backend/popen_loky_posix.py
@@ -8,173 +8,141 @@
 import signal
 import pickle
 from io import BytesIO
-
-from . import reduction, spawn
-from .context import get_spawning_popen, set_spawning_popen
 from multiprocessing import util, process
+from multiprocessing.connection import wait
+from multiprocessing.context import set_spawning_popen
 
-if sys.version_info[:2] < (3, 3):
-    ProcessLookupError = OSError
-
-if sys.platform != "win32":
-    from . import resource_tracker
-
-
-__all__ = []
-
-if sys.platform != "win32":
-    #
-    # Wrapper for an fd used while launching a process
-    #
-
-    class _DupFd(object):
-        def __init__(self, fd):
-            self.fd = reduction._mk_inheritable(fd)
-
-        def detach(self):
-            return self.fd
-
-    #
-    # Start child process using subprocess.Popen
-    #
-
-    __all__.append('Popen')
-
-    class Popen(object):
-        method = 'loky'
-        DupFd = _DupFd
-
-        def __init__(self, process_obj):
-            sys.stdout.flush()
-            sys.stderr.flush()
-            self.returncode = None
-            self._fds = []
-            self._launch(process_obj)
-
-        if sys.version_info < (3, 4):
-            @classmethod
-            def duplicate_for_child(cls, fd):
-                popen = get_spawning_popen()
-                popen._fds.append(fd)
-                return reduction._mk_inheritable(fd)
-
-        else:
-            def duplicate_for_child(self, fd):
-                self._fds.append(fd)
-                return reduction._mk_inheritable(fd)
-
-        def poll(self, flag=os.WNOHANG):
-            if self.returncode is None:
-                while True:
-                    try:
-                        pid, sts = os.waitpid(self.pid, flag)
-                    except OSError:
-                        # Child process not yet created. See #1731717
-                        # e.errno == errno.ECHILD == 10
-                        return None
-                    else:
-                        break
-                if pid == self.pid:
-                    if os.WIFSIGNALED(sts):
-                        self.returncode = -os.WTERMSIG(sts)
-                    else:
-                        assert os.WIFEXITED(sts)
-                        self.returncode = os.WEXITSTATUS(sts)
-            return self.returncode
-
-        def wait(self, timeout=None):
-            if sys.version_info < (3, 3):
-                import time
-                if timeout is None:
-                    return self.poll(0)
-                deadline = time.time() + timeout
-                delay = 0.0005
-                while 1:
-                    res = self.poll()
-                    if res is not None:
-                        break
-                    remaining = deadline - time.time()
-                    if remaining <= 0:
-                        break
-                    delay = min(delay * 2, remaining, 0.05)
-                    time.sleep(delay)
-                return res
-
-            if self.returncode is None:
-                if timeout is not None:
-                    from multiprocessing.connection import wait
-                    if not wait([self.sentinel], timeout):
-                        return None
-                # This shouldn't block if wait() returned successfully.
-                return self.poll(os.WNOHANG if timeout == 0.0 else 0)
-            return self.returncode
-
-        def terminate(self):
-            if self.returncode is None:
-                try:
-                    os.kill(self.pid, signal.SIGTERM)
-                except ProcessLookupError:
-                    pass
-                except OSError:
-                    if self.wait(timeout=0.1) is None:
-                        raise
+from . import reduction, resource_tracker, spawn
 
-        def _launch(self, process_obj):
 
-            tracker_fd = resource_tracker._resource_tracker.getfd()
+__all__ = ['Popen']
 
-            fp = BytesIO()
-            set_spawning_popen(self)
-            try:
-                prep_data = spawn.get_preparation_data(
-                    process_obj._name,
-                    getattr(process_obj, "init_main_module", True))
-                reduction.dump(prep_data, fp)
-                reduction.dump(process_obj, fp)
 
-            finally:
-                set_spawning_popen(None)
+#
+# Wrapper for an fd used while launching a process
+#
 
-            try:
-                parent_r, child_w = os.pipe()
-                child_r, parent_w = os.pipe()
-                # for fd in self._fds:
-                #     _mk_inheritable(fd)
-
-                cmd_python = [sys.executable]
-                cmd_python += ['-m', self.__module__]
-                cmd_python += ['--process-name', str(process_obj.name)]
-                cmd_python += ['--pipe',
-                               str(reduction._mk_inheritable(child_r))]
-                reduction._mk_inheritable(child_w)
-                reduction._mk_inheritable(tracker_fd)
-                self._fds.extend([child_r, child_w, tracker_fd])
-                if sys.version_info >= (3, 8) and os.name == 'posix':
-                    mp_tracker_fd = prep_data['mp_tracker_args']['fd']
-                    self.duplicate_for_child(mp_tracker_fd)
-
-                from .fork_exec import fork_exec
-                pid = fork_exec(cmd_python, self._fds, env=process_obj.env)
-                util.debug("launched python with pid {} and cmd:\n{}"
-                           .format(pid, cmd_python))
-                self.sentinel = parent_r
-
-                method = 'getbuffer'
-                if not hasattr(fp, method):
-                    method = 'getvalue'
-                with os.fdopen(parent_w, 'wb') as f:
-                    f.write(getattr(fp, method)())
-                self.pid = pid
-            finally:
-                if parent_r is not None:
-                    util.Finalize(self, os.close, (parent_r,))
-                for fd in (child_r, child_w):
-                    if fd is not None:
-                        os.close(fd)
+class _DupFd:
+    def __init__(self, fd):
+        self.fd = reduction._mk_inheritable(fd)
 
-        @staticmethod
-        def thread_is_spawning():
-            return True
+    def detach(self):
+        return self.fd
+
+
+#
+# Start child process using subprocess.Popen
+#
+
+class Popen:
+    method = 'loky'
+    DupFd = _DupFd
+
+    def __init__(self, process_obj):
+        sys.stdout.flush()
+        sys.stderr.flush()
+        self.returncode = None
+        self._fds = []
+        self._launch(process_obj)
+
+    def duplicate_for_child(self, fd):
+        self._fds.append(fd)
+        return reduction._mk_inheritable(fd)
+
+    def poll(self, flag=os.WNOHANG):
+        if self.returncode is None:
+            while True:
+                try:
+                    pid, sts = os.waitpid(self.pid, flag)
+                except OSError:
+                    # Child process not yet created. See #1731717
+                    # e.errno == errno.ECHILD == 10
+                    return None
+                else:
+                    break
+            if pid == self.pid:
+                if os.WIFSIGNALED(sts):
+                    self.returncode = -os.WTERMSIG(sts)
+                else:
+                    assert os.WIFEXITED(sts)
+                    self.returncode = os.WEXITSTATUS(sts)
+        return self.returncode
+
+    def wait(self, timeout=None):
+        if self.returncode is None:
+            if timeout is not None:
+                if not wait([self.sentinel], timeout):
+                    return None
+            # This shouldn't block if wait() returned successfully.
+            return self.poll(os.WNOHANG if timeout == 0.0 else 0)
+        return self.returncode
+
+    def terminate(self):
+        if self.returncode is None:
+            try:
+                os.kill(self.pid, signal.SIGTERM)
+            except ProcessLookupError:
+                pass
+            except OSError:
+                if self.wait(timeout=0.1) is None:
+                    raise
+
+    def _launch(self, process_obj):
+
+        tracker_fd = resource_tracker._resource_tracker.getfd()
+
+        fp = BytesIO()
+        set_spawning_popen(self)
+        try:
+            prep_data = spawn.get_preparation_data(
+                process_obj._name,
+                getattr(process_obj, "init_main_module", True))
+            reduction.dump(prep_data, fp)
+            reduction.dump(process_obj, fp)
+
+        finally:
+            set_spawning_popen(None)
+
+        try:
+            parent_r, child_w = os.pipe()
+            child_r, parent_w = os.pipe()
+            # for fd in self._fds:
+            #     _mk_inheritable(fd)
+
+            cmd_python = [sys.executable]
+            cmd_python += ['-m', self.__module__]
+            cmd_python += ['--process-name', str(process_obj.name)]
+            cmd_python += ['--pipe', str(reduction._mk_inheritable(child_r))]
+            reduction._mk_inheritable(child_w)
+            reduction._mk_inheritable(tracker_fd)
+            self._fds += [child_r, child_w, tracker_fd]
+            if sys.version_info >= (3, 8) and os.name == 'posix':
+                mp_tracker_fd = prep_data['mp_tracker_args']['fd']
+                self.duplicate_for_child(mp_tracker_fd)
+
+            from .fork_exec import fork_exec
+            pid = fork_exec(cmd_python, self._fds, env=process_obj.env)
+            util.debug(
+                f"launched python with pid {pid} and cmd:\n{cmd_python}"
+            )
+            self.sentinel = parent_r
+
+            method = 'getbuffer'
+            if not hasattr(fp, method):
+                method = 'getvalue'
+            with os.fdopen(parent_w, 'wb') as f:
+                f.write(getattr(fp, method)())
+            self.pid = pid
+        finally:
+            if parent_r is not None:
+                util.Finalize(self, os.close, (parent_r,))
+            for fd in (child_r, child_w):
+                if fd is not None:
+                    os.close(fd)
+
+    @staticmethod
+    def thread_is_spawning():
+        return True
 
 
 if __name__ == '__main__':
@@ -187,8 +155,7 @@ def thread_is_spawning():
 
     args = parser.parse_args()
 
-    info = dict()
-
+    info = {}
     exitcode = 1
     try:
         with os.fdopen(args.pipe, 'rb') as from_parent:
@@ -203,7 +170,7 @@ def thread_is_spawning():
         exitcode = process_obj._bootstrap()
     except Exception:
         print('\n\n' + '-' * 80)
-        print('{} failed with traceback: '.format(args.process_name))
+        print(f'{args.process_name} failed with traceback: ')
         print('-' * 80)
         import traceback
         print(traceback.format_exc())
diff --git a/joblib/externals/loky/backend/popen_loky_win32.py b/joblib/externals/loky/backend/popen_loky_win32.py
index 523bd078c..e2702a724 100644
--- a/joblib/externals/loky/backend/popen_loky_win32.py
+++ b/joblib/externals/loky/backend/popen_loky_win32.py
@@ -1,24 +1,15 @@
 import os
 import sys
+import msvcrt
+import _winapi
 from pickle import load
 from multiprocessing import process, util
+from multiprocessing.context import get_spawning_popen, set_spawning_popen
+from multiprocessing.popen_spawn_win32 import Popen as _Popen
+from multiprocessing.reduction import duplicate
 
-from . import spawn
-from . import reduction
-from .context import get_spawning_popen, set_spawning_popen
+from . import reduction, spawn
 
-if sys.platform == "win32":
-    # Avoid import error by code introspection tools such as test runners
-    # trying to import this module while running on non-Windows systems.
-    import msvcrt
-    from .compat_win32 import _winapi
-    from .compat_win32 import Popen as _Popen
-    from .reduction import duplicate
-else:
-    _Popen = object
-
-if sys.version_info[:2] < (3, 3):
-    from os import fdopen as open
 
 __all__ = ['Popen']
 
@@ -26,10 +17,6 @@
 #
 #
 
-TERMINATE = 0x10000
-WINEXE = (sys.platform == 'win32' and getattr(sys, 'frozen', False))
-WINSERVICE = sys.executable.lower().endswith("pythonservice.exe")
-
 
 def _path_eq(p1, p2):
     return p1 == p2 or os.path.normcase(p1) == os.path.normcase(p2)
@@ -61,13 +48,12 @@ def __init__(self, process_obj):
         os.close(rfd)
 
         cmd = get_command_line(parent_pid=os.getpid(), pipe_handle=rhandle)
-        cmd = ' '.join('"%s"' % x for x in cmd)
+        cmd = ' '.join(f'"{x}"' for x in cmd)
 
         python_exe = spawn.get_executable()
 
         # copy the environment variables to set in the child process
-        child_env = os.environ.copy()
-        child_env.update(process_obj.env)
+        child_env = {**os.environ, **process_obj.env}
 
         # bpo-35797: When running in a venv, we bypass the redirect
         # executor and launch our base Python.
@@ -87,7 +73,7 @@ def __init__(self, process_obj):
                     # the cleaner multiprocessing.reduction.steal_handle should
                     # be used instead.
                     inherit = True
-                    hp, ht, pid, tid = _winapi.CreateProcess(
+                    hp, ht, pid, _ = _winapi.CreateProcess(
                         python_exe, cmd,
                         None, None, inherit, 0,
                         child_env, None, None)
@@ -105,22 +91,19 @@ def __init__(self, process_obj):
 
                 # send information to child
                 set_spawning_popen(self)
-                if sys.version_info[:2] < (3, 4):
-                    Popen._tls.process_handle = int(hp)
                 try:
                     reduction.dump(prep_data, to_child)
                     reduction.dump(process_obj, to_child)
                 finally:
                     set_spawning_popen(None)
-                    if sys.version_info[:2] < (3, 4):
-                        del Popen._tls.process_handle
         except IOError as exc:
             # IOError 22 happens when the launched subprocess terminated before
             # wfd.close is called. Thus we can safely ignore it.
             if exc.errno != 22:
                 raise
-            util.debug("While starting {}, ignored a IOError 22"
-                       .format(process_obj._name))
+            util.debug(
+                f"While starting {process_obj._name}, ignored a IOError 22"
+            )
 
     def duplicate_for_child(self, handle):
         assert self is get_spawning_popen()
@@ -132,12 +115,12 @@ def get_command_line(pipe_handle, **kwds):
     Returns prefix of command line used for spawning a child process
     '''
     if getattr(sys, 'frozen', False):
-        return ([sys.executable, '--multiprocessing-fork', pipe_handle])
+        return [sys.executable, '--multiprocessing-fork', pipe_handle]
     else:
         prog = 'from joblib.externals.loky.backend.popen_loky_win32 import main; main()'
         opts = util._args_from_interpreter_flags()
-        return [spawn.get_executable()] + opts + [
-            '-c', prog, '--multiprocessing-fork', pipe_handle]
+        return [spawn.get_executable(), *opts,
+                '-c', prog, '--multiprocessing-fork', pipe_handle]
 
 
 def is_forking(argv):
@@ -170,4 +153,4 @@ def main():
     from_parent.close()
 
     exitcode = self._bootstrap()
-    exit(exitcode)
+    sys.exit(exitcode)
diff --git a/joblib/externals/loky/backend/process.py b/joblib/externals/loky/backend/process.py
index 30a20c061..eafde66d0 100644
--- a/joblib/externals/loky/backend/process.py
+++ b/joblib/externals/loky/backend/process.py
@@ -4,11 +4,10 @@
 # authors: Thomas Moreau and Olivier Grisel
 #
 # based on multiprocessing/process.py  (17/02/2017)
-# * Add some compatibility function for python2.7 and 3.3
 #
-import os
 import sys
-from .compat import BaseProcess
+from multiprocessing.context import assert_spawning
+from multiprocessing.process import BaseProcess
 
 
 class LokyProcess(BaseProcess):
@@ -17,15 +16,9 @@ class LokyProcess(BaseProcess):
     def __init__(self, group=None, target=None, name=None, args=(),
                  kwargs={}, daemon=None, init_main_module=False,
                  env=None):
-        if sys.version_info < (3, 3):
-            super(LokyProcess, self).__init__(
-                group=group, target=target, name=name, args=args,
-                kwargs=kwargs)
-            self.daemon = daemon
-        else:
-            super(LokyProcess, self).__init__(
-                group=group, target=target, name=name, args=args,
-                kwargs=kwargs, daemon=daemon)
+        super().__init__(
+            group=group, target=target, name=name, args=args,
+            kwargs=kwargs, daemon=daemon)
         self.env = {} if env is None else env
         self.authkey = self.authkey
         self.init_main_module = init_main_module
@@ -38,55 +31,13 @@ def _Popen(process_obj):
             from .popen_loky_posix import Popen
         return Popen(process_obj)
 
-    if sys.version_info < (3, 3):
-        def start(self):
-            '''
-            Start child process
-            '''
-            from multiprocessing.process import _current_process, _cleanup
-            assert self._popen is None, 'cannot start a process twice'
-            assert self._parent_pid == os.getpid(), \
-                'can only start a process object created by current process'
-            _cleanup()
-            self._popen = self._Popen(self)
-            self._sentinel = self._popen.sentinel
-            _current_process._children.add(self)
-
-        @property
-        def sentinel(self):
-            '''
-            Return a file descriptor (Unix) or handle (Windows) suitable for
-            waiting for process termination.
-            '''
-            try:
-                return self._sentinel
-            except AttributeError:
-                raise ValueError("process not started")
-
-    if sys.version_info < (3, 4):
-        @property
-        def authkey(self):
-            return self._authkey
-
-        @authkey.setter
-        def authkey(self, authkey):
-            '''
-            Set authorization key of process
-            '''
-            self._authkey = AuthenticationKey(authkey)
-
-        def _bootstrap(self):
-            from .context import set_start_method
-            set_start_method(self._start_method)
-            super(LokyProcess, self)._bootstrap()
-
 
 class LokyInitMainProcess(LokyProcess):
     _start_method = 'loky_init_main'
 
     def __init__(self, group=None, target=None, name=None, args=(),
                  kwargs={}, daemon=None):
-        super(LokyInitMainProcess, self).__init__(
+        super().__init__(
             group=group, target=target, name=name, args=args, kwargs=kwargs,
             daemon=daemon, init_main_module=True)
 
@@ -97,7 +48,6 @@ def __init__(self, group=None, target=None, name=None, args=(),
 
 class AuthenticationKey(bytes):
     def __reduce__(self):
-        from .context import assert_spawning
         try:
             assert_spawning(self)
         except RuntimeError:
diff --git a/joblib/externals/loky/backend/queues.py b/joblib/externals/loky/backend/queues.py
index 62735db3a..4113b89fb 100644
--- a/joblib/externals/loky/backend/queues.py
+++ b/joblib/externals/loky/backend/queues.py
@@ -4,8 +4,6 @@
 # authors: Thomas Moreau, Olivier Grisel
 #
 # based on multiprocessing/queues.py (16/02/2017)
-# * Add some compatibility function for python2.7 and 3.3 and makes sure
-#   it uses the right synchronization primitive.
 # * Add some custom reducers for the Queues/SimpleQueue to tweak the
 #   pickling process. (overload Queue._feed/SimpleQueue.put)
 #
@@ -14,16 +12,16 @@
 import errno
 import weakref
 import threading
-
 from multiprocessing import util
-from multiprocessing import connection
-from multiprocessing.synchronize import SEM_VALUE_MAX
-from multiprocessing.queues import Full
-from multiprocessing.queues import _sentinel, Queue as mp_Queue
-from multiprocessing.queues import SimpleQueue as mp_SimpleQueue
+from multiprocessing.queues import (
+    Full,
+    Queue as mp_Queue,
+    SimpleQueue as mp_SimpleQueue,
+    _sentinel,
+)
+from multiprocessing.context import assert_spawning
 
-from .reduction import loads, dumps
-from .context import assert_spawning, get_context
+from .reduction import dumps
 
 
 __all__ = ['Queue', 'SimpleQueue', 'Full']
@@ -32,33 +30,7 @@
 class Queue(mp_Queue):
 
     def __init__(self, maxsize=0, reducers=None, ctx=None):
-
-        if sys.version_info[:2] >= (3, 4):
-            super().__init__(maxsize=maxsize, ctx=ctx)
-        else:
-            if maxsize <= 0:
-                # Can raise ImportError (see issues #3770 and #23400)
-                maxsize = SEM_VALUE_MAX
-            if ctx is None:
-                ctx = get_context()
-            self._maxsize = maxsize
-            self._reader, self._writer = connection.Pipe(duplex=False)
-            self._rlock = ctx.Lock()
-            self._opid = os.getpid()
-            if sys.platform == 'win32':
-                self._wlock = None
-            else:
-                self._wlock = ctx.Lock()
-            self._sem = ctx.BoundedSemaphore(maxsize)
-
-            # For use by concurrent.futures
-            self._ignore_epipe = False
-
-            self._after_fork()
-
-            if sys.platform != 'win32':
-                util.register_after_fork(self, Queue._after_fork)
-
+        super().__init__(maxsize=maxsize, ctx=ctx)
         self._reducers = reducers
 
     # Use custom queue set/get state to be able to reduce the custom reducers
@@ -133,7 +105,7 @@ def _feed(buffer, notempty, send_bytes, writelock, close, reducers,
         else:
             wacquire = None
 
-        while 1:
+        while True:
             try:
                 nacquire()
                 try:
@@ -142,7 +114,7 @@ def _feed(buffer, notempty, send_bytes, writelock, close, reducers,
                 finally:
                     nrelease()
                 try:
-                    while 1:
+                    while True:
                         obj = bpopleft()
                         if obj is sentinel:
                             util.debug('feeder thread got sentinel -- exiting')
@@ -171,7 +143,7 @@ def _feed(buffer, notempty, send_bytes, writelock, close, reducers,
                 # We ignore errors which happen after the process has
                 # started to cleanup.
                 if util.is_exiting():
-                    util.info('error in queue thread: %s', e)
+                    util.info(f'error in queue thread: {e}')
                     return
                 else:
                     queue_sem.release()
@@ -185,29 +157,11 @@ def _on_queue_feeder_error(self, e, obj):
         import traceback
         traceback.print_exc()
 
-    if sys.version_info[:2] < (3, 4):
-        # Compat for python2.7/3.3 that use _send instead of _send_bytes
-        def _after_fork(self):
-            super(Queue, self)._after_fork()
-            self._send_bytes = self._writer.send_bytes
-
 
 class SimpleQueue(mp_SimpleQueue):
 
     def __init__(self, reducers=None, ctx=None):
-        if sys.version_info[:2] >= (3, 4):
-            super().__init__(ctx=ctx)
-        else:
-            # Use the context to create the sync objects for python2.7/3.3
-            if ctx is None:
-                ctx = get_context()
-            self._reader, self._writer = connection.Pipe(duplex=False)
-            self._rlock = ctx.Lock()
-            self._poll = self._reader.poll
-            if sys.platform == 'win32':
-                self._wlock = None
-            else:
-                self._wlock = ctx.Lock()
+        super().__init__(ctx=ctx)
 
         # Add possiblity to use custom reducers
         self._reducers = reducers
@@ -226,15 +180,6 @@ def __setstate__(self, state):
         (self._reader, self._writer, self._reducers, self._rlock,
          self._wlock) = state
 
-    if sys.version_info[:2] < (3, 4):
-        # For python2.7/3.3, overload get to avoid creating deadlocks with
-        # unpickling errors.
-        def get(self):
-            with self._rlock:
-                res = self._reader.recv_bytes()
-            # unserialize the data after having released the lock
-            return loads(res)
-
     # Overload put to use our customizable reducer
     def put(self, obj):
         # serialize the data before acquiring the lock
diff --git a/joblib/externals/loky/backend/reduction.py b/joblib/externals/loky/backend/reduction.py
index 4a2407c53..f1ee394bb 100644
--- a/joblib/externals/loky/backend/reduction.py
+++ b/joblib/externals/loky/backend/reduction.py
@@ -8,71 +8,28 @@
 #  * Add CustomizableLokyPickler to allow customizing pickling process
 #    on the fly.
 #
+import copyreg
 import io
-import os
-import sys
 import functools
-from multiprocessing import util
 import types
-try:
-    # Python 2 compat
-    from cPickle import loads as pickle_loads
-except ImportError:
-    from pickle import loads as pickle_loads
-    import copyreg
-
-from pickle import HIGHEST_PROTOCOL
-
-if sys.platform == "win32":
-    if sys.version_info[:2] > (3, 3):
-        from multiprocessing.reduction import duplicate
-    else:
-        from multiprocessing.forking import duplicate
+import sys
+import os
 
+from multiprocessing import util
+from pickle import loads, HIGHEST_PROTOCOL
 
 ###############################################################################
 # Enable custom pickling in Loky.
-# To allow instance customization of the pickling process, we use 2 classes.
-# _ReducerRegistry gives module level customization and CustomizablePickler
-# permits to use instance base custom reducers. Only CustomizablePickler
-# should be used.
-
-class _ReducerRegistry(object):
-    """Registry for custom reducers.
 
-    HIGHEST_PROTOCOL is selected by default as this pickler is used
-    to pickle ephemeral datastructures for interprocess communication
-    hence no backward compatibility is required.
+_dispatch_table = {}
 
-    """
-
-    # We override the pure Python pickler as its the only way to be able to
-    # customize the dispatch table without side effects in Python 2.6
-    # to 3.2. For Python 3.3+ leverage the new dispatch_table
-    # feature from http://bugs.python.org/issue14166 that makes it possible
-    # to use the C implementation of the Pickler which is faster.
-
-    dispatch_table = {}
-
-    @classmethod
-    def register(cls, type, reduce_func):
-        """Attach a reducer function to a given type in the dispatch table."""
-        if sys.version_info < (3,):
-            # Python 2 pickler dispatching is not explicitly customizable.
-            # Let us use a closure to workaround this limitation.
-            def dispatcher(cls, obj):
-                reduced = reduce_func(obj)
-                cls.save_reduce(obj=obj, *reduced)
-            cls.dispatch_table[type] = dispatcher
-        else:
-            cls.dispatch_table[type] = reduce_func
 
+def register(type_, reduce_function):
+    _dispatch_table[type_] = reduce_function
 
 ###############################################################################
 # Registers extra pickling routines to improve picklization  for loky
 
-register = _ReducerRegistry.register
-
 
 # make methods picklable
 def _reduce_method(m):
@@ -157,15 +114,16 @@ def set_loky_pickler(loky_pickler=None):
             loky_pickler_cls = module_pickle.Pickler
         except (ImportError, AttributeError) as e:
             extra_info = ("\nThis error occurred while setting loky_pickler to"
-                          " '{}', as required by the env variable LOKY_PICKLER"
-                          " or the function set_loky_pickler."
-                          .format(loky_pickler))
+                          f" '{loky_pickler}', as required by the env variable "
+                          "LOKY_PICKLER or the function set_loky_pickler.")
             e.args = (e.args[0] + extra_info,) + e.args[1:]
             e.msg = e.args[0]
             raise e
 
-    util.debug("Using '{}' for serialization."
-               .format(loky_pickler if loky_pickler else "cloudpickle"))
+    util.debug(
+        f"Using '{loky_pickler if loky_pickler else 'cloudpickle'}' for "
+        "serialization."
+    )
 
     class CustomizablePickler(loky_pickler_cls):
         _loky_pickler_cls = loky_pickler_cls
@@ -195,43 +153,32 @@ def __init__(self, writer, reducers=None, protocol=HIGHEST_PROTOCOL):
             loky_pickler_cls.__init__(self, writer, protocol=protocol)
             if reducers is None:
                 reducers = {}
-            if sys.version_info < (3,):
-                self.dispatch = loky_pickler_cls.dispatch.copy()
-                self.dispatch.update(_ReducerRegistry.dispatch_table)
+
+            if hasattr(self, "dispatch_table"):
+                # Force a copy that we will update without mutating the
+                # any class level defined dispatch_table.
+                loky_dt = dict(self.dispatch_table)
             else:
-                if hasattr(self, "dispatch_table"):
-                    # Force a copy that we will update without mutating the
-                    # any class level defined dispatch_table.
-                    loky_dt = dict(self.dispatch_table)
-                else:
-                    # Use standard reducers as bases
-                    loky_dt = copyreg.dispatch_table.copy()
-
-                # Register loky specific reducers
-                loky_dt.update(_ReducerRegistry.dispatch_table)
-
-                # Set the new dispatch table, taking care of the fact that we
-                # need to use the member_descriptor when we inherit from a
-                # subclass of the C implementation of the Pickler base class
-                # with an class level dispatch_table attribute.
-                self._set_dispatch_table(loky_dt)
-
-            # Register custom reducers
+                # Use standard reducers as bases
+                loky_dt = copyreg.dispatch_table.copy()
+
+            # Register loky specific reducers
+            loky_dt.update(_dispatch_table)
+
+            # Set the new dispatch table, taking care of the fact that we
+            # need to use the member_descriptor when we inherit from a
+            # subclass of the C implementation of the Pickler base class
+            # with an class level dispatch_table attribute.
+            self._set_dispatch_table(loky_dt)
+
+            # Register the reducers
             for type, reduce_func in reducers.items():
                 self.register(type, reduce_func)
 
         def register(self, type, reduce_func):
             """Attach a reducer function to a given type in the dispatch table.
             """
-            if sys.version_info < (3,):
-                # Python 2 pickler dispatching is not explicitly customizable.
-                # Let us use a closure to workaround this limitation.
-                def dispatcher(self, obj):
-                    reduced = reduce_func(obj)
-                    self.save_reduce(obj=obj, *reduced)
-                self.dispatch[type] = dispatcher
-            else:
-                self.dispatch_table[type] = reduce_func
+            self.dispatch_table[type] = reduce_func
 
     _LokyPickler = CustomizablePickler
     _loky_pickler_name = loky_pickler
@@ -251,13 +198,6 @@ def get_loky_pickler():
 set_loky_pickler()
 
 
-def loads(buf):
-    # Compat for python2.7 version
-    if sys.version_info < (3, 3) and isinstance(buf, io.BytesIO):
-        buf = buf.getvalue()
-    return pickle_loads(buf)
-
-
 def dump(obj, file, reducers=None, protocol=None):
     '''Replacement for pickle.dump() using _LokyPickler.'''
     global _LokyPickler
@@ -269,12 +209,11 @@ def dumps(obj, reducers=None, protocol=None):
 
     buf = io.BytesIO()
     dump(obj, buf, reducers=reducers, protocol=protocol)
-    if sys.version_info < (3, 3):
-        return buf.getvalue()
     return buf.getbuffer()
 
 
 __all__ = ["dump", "dumps", "loads", "register", "set_loky_pickler"]
 
 if sys.platform == "win32":
+    from multiprocessing.reduction import duplicate
     __all__ += ["duplicate"]
diff --git a/joblib/externals/loky/backend/resource_tracker.py b/joblib/externals/loky/backend/resource_tracker.py
index 95dff35d0..d84504e14 100644
--- a/joblib/externals/loky/backend/resource_tracker.py
+++ b/joblib/externals/loky/backend/resource_tracker.py
@@ -6,9 +6,13 @@
 #
 # adapted from multiprocessing/semaphore_tracker.py  (17/02/2017)
 #  * include custom spawnv_passfds to start the process
-#  * use custom unlink from our own SemLock implementation
 #  * add some VERBOSE logging
 #
+# TODO: multiprocessing.resource_tracker was contributed to Python 3.8 so
+# once loky drops support for Python 3.7 it might be possible to stop
+# maintaining this loky-specific fork. As a consequence, it might also be
+# possible to stop maintaining the loky.backend.synchronize fork of
+# multiprocessing.synchronize.
 
 #
 # On Unix we run a server process which keeps track of unlinked
@@ -45,23 +49,16 @@
 import signal
 import warnings
 import threading
+from _multiprocessing import sem_unlink
+from multiprocessing import util
 
 from . import spawn
-from multiprocessing import util
 
 if sys.platform == "win32":
-    from .compat_win32 import _winapi
-    from .reduction import duplicate
+    import _winapi
     import msvcrt
+    from multiprocessing.reduction import duplicate
 
-try:
-    from _multiprocessing import sem_unlink
-except ImportError:
-    from .semlock import sem_unlink
-
-if sys.version_info < (3,):
-    BrokenPipeError = OSError
-    from os import fdopen as open
 
 __all__ = ['ensure_running', 'register', 'unregister']
 
@@ -80,7 +77,7 @@
 VERBOSE = False
 
 
-class ResourceTracker(object):
+class ResourceTracker:
 
     def __init__(self):
         self._lock = threading.Lock()
@@ -133,22 +130,13 @@ def ensure_running(self):
                 os.close(r)
                 r = _r
 
-            cmd = 'from {} import main; main({}, {})'.format(
-                main.__module__, r, VERBOSE)
+            cmd = f'from {main.__module__} import main; main({r}, {VERBOSE})'
             try:
                 fds_to_pass.append(r)
                 # process will out live us, so no need to wait on pid
                 exe = spawn.get_executable()
-                args = [exe] + util._args_from_interpreter_flags()
-                # In python 3.3, there is a bug which put `-RRRRR..` instead of
-                # `-R` in args. Replace it to get the correct flags.
-                # See https://github.com/python/cpython/blob/3.3/Lib/subprocess.py#L488
-                if sys.version_info[:2] <= (3, 3):
-                    import re
-                    for i in range(1, len(args)):
-                        args[i] = re.sub("-R+", "-R", args[i])
-                args += ['-c', cmd]
-                util.debug("launching resource tracker: {}".format(args))
+                args = [exe, *util._args_from_interpreter_flags(), '-c', cmd]
+                util.debug(f"launching resource tracker: {args}")
                 # bpo-33613: Register a signal mask that will block the
                 # signals.  This signal mask will be inherited by the child
                 # that is going to be spawned and will protect the child from a
@@ -201,11 +189,11 @@ def maybe_unlink(self, name, rtype):
         self._send("MAYBE_UNLINK", name, rtype)
 
     def _send(self, cmd, name, rtype):
-        msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
         if len(name) > 512:
             # posix guarantees that writes to a pipe of less than PIPE_BUF
             # bytes are atomic, and that PIPE_BUF >= 512
             raise ValueError('name too long')
+        msg = f'{cmd}:{name}:{rtype}\n'.encode('ascii')
         nbytes = os.write(self._fd, msg)
         assert nbytes == len(msg)
 
@@ -239,7 +227,7 @@ def main(fd, verbose=0):
     if verbose:
         util.debug("Main resource tracker is running")
 
-    registry = {rtype: dict() for rtype in _CLEANUP_FUNCS.keys()}
+    registry = {rtype: {} for rtype in _CLEANUP_FUNCS.keys()}
     try:
         # keep track of registered/unregistered resources
         if sys.platform == "win32":
@@ -261,10 +249,11 @@ def main(fd, verbose=0):
 
                     if rtype not in _CLEANUP_FUNCS:
                         raise ValueError(
-                            'Cannot register {} for automatic cleanup: '
-                            'unknown resource type ({}). Resource type should '
-                            'be one of the following: {}'.format(
-                                name, rtype, list(_CLEANUP_FUNCS.keys())))
+                            f'Cannot register {name} for automatic cleanup: '
+                            f'unknown resource type ({rtype}). Resource type '
+                            'should be one of the following: '
+                            f'{list(_CLEANUP_FUNCS.keys())}'
+                        )
 
                     if cmd == 'REGISTER':
                         if name not in registry[rtype]:
@@ -274,37 +263,40 @@ def main(fd, verbose=0):
 
                         if verbose:
                             util.debug(
-                                "[ResourceTracker] incremented refcount of {} "
-                                "{} (current {})".format(
-                                    rtype, name, registry[rtype][name]))
+                                "[ResourceTracker] incremented refcount of "
+                                f"{rtype} {name} "
+                                f"(current {registry[rtype][name]})"
+                            )
                     elif cmd == 'UNREGISTER':
                         del registry[rtype][name]
                         if verbose:
                             util.debug(
-                                "[ResourceTracker] unregister {} {}: "
-                                "registry({})".format(name, rtype, len(registry)))
+                                f"[ResourceTracker] unregister {name} {rtype}: "
+                                f"registry({len(registry)})"
+                            )
                     elif cmd == 'MAYBE_UNLINK':
                         registry[rtype][name] -= 1
                         if verbose:
                             util.debug(
-                                "[ResourceTracker] decremented refcount of {} "
-                                "{} (current {})".format(
-                                    rtype, name, registry[rtype][name]))
+                                "[ResourceTracker] decremented refcount of "
+                                f"{rtype} {name} "
+                                f"(current {registry[rtype][name]})"
+                            )
 
                         if registry[rtype][name] == 0:
                             del registry[rtype][name]
                             try:
                                 if verbose:
                                     util.debug(
-                                            "[ResourceTracker] unlink {}"
-                                            .format(name))
+                                        f"[ResourceTracker] unlink {name}"
+                                    )
                                 _CLEANUP_FUNCS[rtype](name)
                             except Exception as e:
                                 warnings.warn(
-                                    'resource_tracker: %s: %r' % (name, e))
+                                    f'resource_tracker: {name}: {e!r}')
 
                     else:
-                        raise RuntimeError('unrecognized command %r' % cmd)
+                        raise RuntimeError(f'unrecognized command {cmd!r}')
                 except BaseException:
                     try:
                         sys.excepthook(*sys.exc_info())
@@ -315,9 +307,11 @@ def main(fd, verbose=0):
         def _unlink_resources(rtype_registry, rtype):
             if rtype_registry:
                 try:
-                    warnings.warn('resource_tracker: There appear to be %d '
-                                  'leaked %s objects to clean up at shutdown' %
-                                  (len(rtype_registry), rtype))
+                    warnings.warn(
+                        'resource_tracker: There appear to be '
+                        f'{len(rtype_registry)} leaked {rtype} objects to '
+                        'clean up at shutdown'
+                    )
                 except Exception:
                     pass
             for name in rtype_registry:
@@ -327,10 +321,9 @@ def _unlink_resources(rtype_registry, rtype):
                 try:
                     _CLEANUP_FUNCS[rtype](name)
                     if verbose:
-                        util.debug("[ResourceTracker] unlink {}"
-                                         .format(name))
+                        util.debug(f"[ResourceTracker] unlink {name}")
                 except Exception as e:
-                    warnings.warn('resource_tracker: %s: %r' % (name, e))
+                    warnings.warn(f'resource_tracker: {name}: {e!r}')
 
         for rtype, rtype_registry in registry.items():
             if rtype == "folder":
@@ -361,18 +354,16 @@ def spawnv_passfds(path, args, passfds):
         errpipe_read, errpipe_write = os.pipe()
         try:
             from .reduction import _mk_inheritable
-            _pass = []
-            for fd in passfds:
-                _pass += [_mk_inheritable(fd)]
             from .fork_exec import fork_exec
+            _pass = [_mk_inheritable(fd) for fd in passfds]
             return fork_exec(args, _pass)
         finally:
             os.close(errpipe_read)
             os.close(errpipe_write)
     else:
-        cmd = ' '.join('"%s"' % x for x in args)
+        cmd = ' '.join(f'"{x}"' for x in args)
         try:
-            hp, ht, pid, tid = _winapi.CreateProcess(
+            _, ht, pid, _ = _winapi.CreateProcess(
                 path, cmd, None, None, True, 0, None, None, None)
             _winapi.CloseHandle(ht)
         except BaseException:
diff --git a/joblib/externals/loky/backend/semlock.py b/joblib/externals/loky/backend/semlock.py
deleted file mode 100644
index 2d35f6a27..000000000
--- a/joblib/externals/loky/backend/semlock.py
+++ /dev/null
@@ -1,274 +0,0 @@
-###############################################################################
-# Ctypes implementation for posix semaphore.
-#
-# author: Thomas Moreau and Olivier Grisel
-#
-# adapted from cpython/Modules/_multiprocessing/semaphore.c (17/02/2017)
-#  * use ctypes to access pthread semaphores and provide a full python
-#    semaphore management.
-#  * For OSX, as no sem_getvalue is not implemented, Semaphore with value > 1
-#    are not guaranteed to work.
-#  * Only work with LokyProcess on posix
-#
-import os
-import sys
-import time
-import errno
-import ctypes
-import tempfile
-import threading
-from ctypes.util import find_library
-
-# As we need to use ctypes return types for semlock object, failure value
-# needs to be cast to proper python value. Unix failure convention is to
-# return 0, whereas OSX returns -1
-SEM_FAILURE = ctypes.c_void_p(0).value
-if sys.platform == 'darwin':
-    SEM_FAILURE = ctypes.c_void_p(-1).value
-
-# Semaphore types
-RECURSIVE_MUTEX = 0
-SEMAPHORE = 1
-
-# Semaphore constants
-SEM_OFLAG = ctypes.c_int(os.O_CREAT | os.O_EXCL)
-SEM_PERM = ctypes.c_int(384)
-
-
-class timespec(ctypes.Structure):
-    _fields_ = [("tv_sec", ctypes.c_long), ("tv_nsec", ctypes.c_long)]
-
-
-if sys.platform != 'win32':
-    pthread = ctypes.CDLL(find_library('pthread'), use_errno=True)
-    pthread.sem_open.restype = ctypes.c_void_p
-    pthread.sem_close.argtypes = [ctypes.c_void_p]
-    pthread.sem_wait.argtypes = [ctypes.c_void_p]
-    pthread.sem_trywait.argtypes = [ctypes.c_void_p]
-    pthread.sem_post.argtypes = [ctypes.c_void_p]
-    pthread.sem_getvalue.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
-    pthread.sem_unlink.argtypes = [ctypes.c_char_p]
-    if sys.platform != "darwin":
-        pthread.sem_timedwait.argtypes = [ctypes.c_void_p,
-                                          ctypes.POINTER(timespec)]
-
-try:
-    from threading import get_ident
-except ImportError:
-    def get_ident():
-        return threading.current_thread().ident
-
-
-if sys.version_info[:2] < (3, 3):
-    class FileExistsError(OSError):
-        pass
-
-    class FileNotFoundError(OSError):
-        pass
-
-
-def sem_unlink(name):
-    if pthread.sem_unlink(name.encode('ascii')) < 0:
-        raiseFromErrno()
-
-
-def _sem_open(name, value=None):
-    """ Construct or retrieve a semaphore with the given name
-
-    If value is None, try to retrieve an existing named semaphore.
-    Else create a new semaphore with the given value
-    """
-    if value is None:
-        handle = pthread.sem_open(ctypes.c_char_p(name), 0)
-    else:
-        handle = pthread.sem_open(ctypes.c_char_p(name), SEM_OFLAG, SEM_PERM,
-                                  ctypes.c_int(value))
-
-    if handle == SEM_FAILURE:
-        e = ctypes.get_errno()
-        if e == errno.EEXIST:
-            raise FileExistsError("a semaphore named %s already exists" % name)
-        elif e == errno.ENOENT:
-            raise FileNotFoundError('cannot find semaphore named %s' % name)
-        elif e == errno.ENOSYS:
-            raise NotImplementedError('No semaphore implementation on this '
-                                      'system')
-        else:
-            raiseFromErrno()
-
-    return handle
-
-
-def _sem_timedwait(handle, timeout):
-    t_start = time.time()
-    if sys.platform != "darwin":
-        sec = int(timeout)
-        tv_sec = int(t_start)
-        nsec = int(1e9 * (timeout - sec) + .5)
-        tv_nsec = int(1e9 * (t_start - tv_sec) + .5)
-        deadline = timespec(sec+tv_sec, nsec+tv_nsec)
-        deadline.tv_sec += int(deadline.tv_nsec / 1000000000)
-        deadline.tv_nsec %= 1000000000
-        return pthread.sem_timedwait(handle, ctypes.pointer(deadline))
-
-    # PERFORMANCE WARNING
-    # No sem_timedwait on OSX so we implement our own method. This method can
-    # degrade performances has the wait can have a latency up to 20 msecs
-    deadline = t_start + timeout
-    delay = 0
-    now = time.time()
-    while True:
-        # Poll the sem file
-        res = pthread.sem_trywait(handle)
-        if res == 0:
-            return 0
-        else:
-            e = ctypes.get_errno()
-            if e != errno.EAGAIN:
-                raiseFromErrno()
-
-        # check for timeout
-        now = time.time()
-        if now > deadline:
-            ctypes.set_errno(errno.ETIMEDOUT)
-            return -1
-
-        # calculate how much time left and check the delay is not too long
-        # -- maximum is 20 msecs
-        difference = (deadline - now)
-        delay = min(delay, 20e-3, difference)
-
-        # Sleep and increase delay
-        time.sleep(delay)
-        delay += 1e-3
-
-
-class SemLock(object):
-    """ctypes wrapper to the unix semaphore"""
-
-    _rand = tempfile._RandomNameSequence()
-
-    def __init__(self, kind, value, maxvalue, name=None, unlink_now=False):
-        self.count = 0
-        self.ident = 0
-        self.kind = kind
-        self.maxvalue = maxvalue
-        self.name = name
-        self.handle = _sem_open(self.name.encode('ascii'), value)
-
-    def __del__(self):
-        try:
-            res = pthread.sem_close(self.handle)
-            assert res == 0, "Issue while closing semaphores"
-        except AttributeError:
-            pass
-
-    def _is_mine(self):
-        return self.count > 0 and get_ident() == self.ident
-
-    def acquire(self, block=True, timeout=None):
-        if self.kind == RECURSIVE_MUTEX and self._is_mine():
-            self.count += 1
-            return True
-
-        if block and timeout is None:
-            res = pthread.sem_wait(self.handle)
-        elif not block or timeout <= 0:
-            res = pthread.sem_trywait(self.handle)
-        else:
-            res = _sem_timedwait(self.handle, timeout)
-        if res < 0:
-            e = ctypes.get_errno()
-            if e == errno.EINTR:
-                return None
-            elif e in [errno.EAGAIN, errno.ETIMEDOUT]:
-                return False
-            raiseFromErrno()
-        self.count += 1
-        self.ident = get_ident()
-        return True
-
-    def release(self):
-        if self.kind == RECURSIVE_MUTEX:
-            assert self._is_mine(), (
-                "attempt to release recursive lock not owned by thread")
-            if self.count > 1:
-                self.count -= 1
-                return
-            assert self.count == 1
-        else:
-            if sys.platform == 'darwin':
-                # Handle broken get_value for mac ==> only Lock will work
-                # as sem_get_value do not work properly
-                if self.maxvalue == 1:
-                    if pthread.sem_trywait(self.handle) < 0:
-                        e = ctypes.get_errno()
-                        if e != errno.EAGAIN:
-                            raise OSError(e, errno.errorcode[e])
-                    else:
-                        if pthread.sem_post(self.handle) < 0:
-                            raiseFromErrno()
-                        else:
-                            raise ValueError(
-                                "semaphore or lock released too many times")
-                else:
-                    import warnings
-                    warnings.warn("semaphore are broken on OSX, release might "
-                                  "increase its maximal value", RuntimeWarning)
-            else:
-                value = self._get_value()
-                if value >= self.maxvalue:
-                    raise ValueError(
-                        "semaphore or lock released too many times")
-
-        if pthread.sem_post(self.handle) < 0:
-            raiseFromErrno()
-
-        self.count -= 1
-
-    def _get_value(self):
-        value = ctypes.pointer(ctypes.c_int(-1))
-        if pthread.sem_getvalue(self.handle, value) < 0:
-            raiseFromErrno()
-        return value.contents.value
-
-    def _count(self):
-        return self.count
-
-    def _is_zero(self):
-        if sys.platform == 'darwin':
-            # Handle broken get_value for mac ==> only Lock will work
-            # as sem_get_value do not work properly
-            if pthread.sem_trywait(self.handle) < 0:
-                e = ctypes.get_errno()
-                if e == errno.EAGAIN:
-                    return True
-                raise OSError(e, errno.errorcode[e])
-            else:
-                if pthread.sem_post(self.handle) < 0:
-                    raiseFromErrno()
-                return False
-        else:
-            value = ctypes.pointer(ctypes.c_int(-1))
-            if pthread.sem_getvalue(self.handle, value) < 0:
-                raiseFromErrno()
-            return value.contents.value == 0
-
-    def _after_fork(self):
-        self.count = 0
-
-    @staticmethod
-    def _rebuild(handle, kind, maxvalue, name):
-        self = SemLock.__new__(SemLock)
-        self.count = 0
-        self.ident = 0
-        self.kind = kind
-        self.maxvalue = maxvalue
-        self.name = name
-        self.handle = _sem_open(name.encode('ascii'))
-        return self
-
-
-def raiseFromErrno():
-    e = ctypes.get_errno()
-    raise OSError(e, errno.errorcode[e])
diff --git a/joblib/externals/loky/backend/spawn.py b/joblib/externals/loky/backend/spawn.py
index 2a16c844b..3a9cc2dd1 100644
--- a/joblib/externals/loky/backend/spawn.py
+++ b/joblib/externals/loky/backend/spawn.py
@@ -18,7 +18,7 @@
     WINSERVICE = False
 else:
     import msvcrt
-    from .reduction import duplicate
+    from multiprocessing.reduction import duplicate
     WINEXE = (sys.platform == 'win32' and getattr(sys, 'frozen', False))
     WINSERVICE = sys.executable.lower().endswith("pythonservice.exe")
 
@@ -65,19 +65,12 @@ def get_preparation_data(name, init_main_module=True):
     )
 
     # Send sys_path and make sure the current directory will not be changed
-    sys_path = [p for p in sys.path]
-    try:
-        i = sys_path.index('')
-    except ValueError:
-        pass
-    else:
-        sys_path[i] = process.ORIGINAL_DIR
-    d['sys_path'] = sys_path
+    d['sys_path'] = [p if p != '' else process.ORIGINAL_DIR for p in sys.path]
 
     # Make sure to pass the information if the multiprocessing logger is active
     if util._logger is not None:
         d['log_level'] = util._logger.getEffectiveLevel()
-        if len(util._logger.handlers) > 0:
+        if util._logger.handlers:
             h = util._logger.handlers[0]
             d['log_fmt'] = h.formatter._fmt
 
@@ -129,8 +122,6 @@ def get_preparation_data(name, init_main_module=True):
                         process.ORIGINAL_DIR is not None):
                     main_path = os.path.join(process.ORIGINAL_DIR, main_path)
                 d['init_main_from_path'] = os.path.normpath(main_path)
-                # Compat for python2.7
-                d['main_path'] = d['init_main_from_path']
 
     return d
 
@@ -249,10 +240,3 @@ def _fixup_main_from_path(main_path):
                                   run_name="__mp_main__")
     main_module.__dict__.update(main_content)
     sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
-
-
-def import_main_path(main_path):
-    '''
-    Set sys.modules['__main__'] to module at main_path
-    '''
-    _fixup_main_from_path(main_path)
diff --git a/joblib/externals/loky/backend/synchronize.py b/joblib/externals/loky/backend/synchronize.py
index 592de3c02..a9518a880 100644
--- a/joblib/externals/loky/backend/synchronize.py
+++ b/joblib/externals/loky/backend/synchronize.py
@@ -5,10 +5,12 @@
 #
 # adapted from multiprocessing/synchronize.py (17/02/2017)
 #  * Remove ctx argument for compatibility reason
-#  * Implementation of Condition/Event are necessary for compatibility
-#    with python2.7/3.3, Barrier should be reimplemented to for those
-#    version (but it is not used in loky).
+#  * Registers a cleanup function with the loky resource_tracker to remove the
+#    semaphore when the process dies instead.
 #
+# TODO: investigate which Python version is required to be able to use
+# multiprocessing.resource_tracker and therefore multiprocessing.synchronize
+# instead of a loky-specific fork.
 
 import os
 import sys
@@ -16,11 +18,10 @@
 import threading
 import _multiprocessing
 from time import time as _time
+from multiprocessing import process, util
+from multiprocessing.context import assert_spawning
 
-from .context import assert_spawning
 from . import resource_tracker
-from multiprocessing import process
-from multiprocessing import util
 
 __all__ = [
     'Lock', 'RLock', 'Semaphore', 'BoundedSemaphore', 'Condition', 'Event'
@@ -29,26 +30,19 @@
 # raise ImportError for platforms lacking a working sem_open implementation.
 # See issue 3770
 try:
-    if sys.version_info < (3, 4):
-        from .semlock import SemLock as _SemLock
-        from .semlock import sem_unlink
-    else:
-        from _multiprocessing import SemLock as _SemLock
-        from _multiprocessing import sem_unlink
-except (ImportError):
+    from _multiprocessing import SemLock as _SemLock
+    from _multiprocessing import sem_unlink
+except ImportError:
     raise ImportError("This platform lacks a functioning sem_open" +
                       " implementation, therefore, the required" +
                       " synchronization primitives needed will not" +
                       " function, see issue 3770.")
 
-if sys.version_info[:2] < (3, 3):
-    FileExistsError = OSError
-
 #
 # Constants
 #
 
-RECURSIVE_MUTEX, SEMAPHORE = list(range(2))
+RECURSIVE_MUTEX, SEMAPHORE = range(2)
 SEM_VALUE_MAX = _multiprocessing.SemLock.SEM_VALUE_MAX
 
 
@@ -56,27 +50,35 @@
 # Base class for semaphores and mutexes; wraps `_multiprocessing.SemLock`
 #
 
-class SemLock(object):
+class SemLock:
 
     _rand = tempfile._RandomNameSequence()
 
-    def __init__(self, kind, value, maxvalue):
+    def __init__(self, kind, value, maxvalue, name=None):
         # unlink_now is only used on win32 or when we are using fork.
         unlink_now = False
-        for i in range(100):
-            try:
-                self._semlock = _SemLock(
-                    kind, value, maxvalue, SemLock._make_name(),
-                    unlink_now)
-            except FileExistsError:  # pragma: no cover
-                pass
-            else:
-                break
-        else:  # pragma: no cover
-            raise FileExistsError('cannot find name for semaphore')
-
-        util.debug('created semlock with handle %s and name "%s"'
-                   % (self._semlock.handle, self._semlock.name))
+        if name is None:
+            # Try to find an unused name for the SemLock instance.
+            for _ in range(100):
+                try:
+                    self._semlock = _SemLock(
+                        kind, value, maxvalue, SemLock._make_name(), unlink_now
+                    )
+                except FileExistsError:  # pragma: no cover
+                    pass
+                else:
+                    break
+            else:  # pragma: no cover
+                raise FileExistsError('cannot find name for semaphore')
+        else:
+            self._semlock = _SemLock(
+                kind, value, maxvalue, name, unlink_now
+            )
+        self.name = name
+        util.debug(
+            f'created semlock with handle {self._semlock.handle} and name '
+            f'"{self.name}"'
+        )
 
         self._make_methods()
 
@@ -93,8 +95,14 @@ def _after_fork(obj):
 
     @staticmethod
     def _cleanup(name):
-        sem_unlink(name)
-        resource_tracker.unregister(name, "semlock")
+        try:
+            sem_unlink(name)
+        except FileNotFoundError:
+            # Already unlinked, possibly by user code: ignore and make sure to
+            # unregister the semaphore from the resource tracker.
+            pass
+        finally:
+            resource_tracker.unregister(name, "semlock")
 
     def _make_methods(self):
         self.acquire = self._semlock.acquire
@@ -114,14 +122,15 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         self._semlock = _SemLock._rebuild(*state)
-        util.debug('recreated blocker with handle %r and name "%s"'
-                   % (state[0], state[3]))
+        util.debug(
+            f'recreated blocker with handle {state[0]!r} and name "{state[3]}"'
+        )
         self._make_methods()
 
     @staticmethod
     def _make_name():
         # OSX does not support long names for semaphores
-        return '/loky-%i-%s' % (os.getpid(), next(SemLock._rand))
+        return f'/loky-{os.getpid()}-{next(SemLock._rand)}'
 
 
 #
@@ -143,7 +152,7 @@ def __repr__(self):
             value = self._semlock._get_value()
         except Exception:
             value = 'unknown'
-        return '<%s(value=%s)>' % (self.__class__.__name__, value)
+        return f'<{self.__class__.__name__}(value={value})>'
 
 
 #
@@ -160,8 +169,10 @@ def __repr__(self):
             value = self._semlock._get_value()
         except Exception:
             value = 'unknown'
-        return '<%s(value=%s, maxvalue=%s)>' % \
-               (self.__class__.__name__, value, self._semlock.maxvalue)
+        return (
+            f'<{self.__class__.__name__}(value={value}, '
+            f'maxvalue={self._semlock.maxvalue})>'
+        )
 
 
 #
@@ -171,14 +182,14 @@ def __repr__(self):
 class Lock(SemLock):
 
     def __init__(self):
-        super(Lock, self).__init__(SEMAPHORE, 1, 1)
+        super().__init__(SEMAPHORE, 1, 1)
 
     def __repr__(self):
         try:
             if self._semlock._is_mine():
                 name = process.current_process().name
                 if threading.current_thread().name != 'MainThread':
-                    name += '|' + threading.current_thread().name
+                    name = f'{name}|{threading.current_thread().name}'
             elif self._semlock._get_value() == 1:
                 name = 'None'
             elif self._semlock._count() > 0:
@@ -187,7 +198,7 @@ def __repr__(self):
                 name = 'SomeOtherProcess'
         except Exception:
             name = 'unknown'
-        return '<%s(owner=%s)>' % (self.__class__.__name__, name)
+        return f'<{self.__class__.__name__}(owner={name})>'
 
 
 #
@@ -197,14 +208,14 @@ def __repr__(self):
 class RLock(SemLock):
 
     def __init__(self):
-        super(RLock, self).__init__(RECURSIVE_MUTEX, 1, 1)
+        super().__init__(RECURSIVE_MUTEX, 1, 1)
 
     def __repr__(self):
         try:
             if self._semlock._is_mine():
                 name = process.current_process().name
                 if threading.current_thread().name != 'MainThread':
-                    name += '|' + threading.current_thread().name
+                    name = f'{name}|{threading.current_thread().name}'
                 count = self._semlock._count()
             elif self._semlock._get_value() == 1:
                 name, count = 'None', 0
@@ -214,14 +225,14 @@ def __repr__(self):
                 name, count = 'SomeOtherProcess', 'nonzero'
         except Exception:
             name, count = 'unknown', 'unknown'
-        return '<%s(%s, %s)>' % (self.__class__.__name__, name, count)
+        return f'<{self.__class__.__name__}({name}, {count})>'
 
 
 #
 # Condition variable
 #
 
-class Condition(object):
+class Condition:
 
     def __init__(self, lock=None):
         self._lock = lock or RLock()
@@ -256,8 +267,7 @@ def __repr__(self):
                            self._woken_count._semlock._get_value())
         except Exception:
             num_waiters = 'unknown'
-        return '<%s(%s, %s)>' % (self.__class__.__name__,
-                                 self._lock, num_waiters)
+        return f'<{self.__class__.__name__}({self._lock}, {num_waiters})>'
 
     def wait(self, timeout=None):
         assert self._lock._semlock._is_mine(), \
@@ -268,7 +278,7 @@ def wait(self, timeout=None):
 
         # release lock
         count = self._lock._semlock._count()
-        for i in range(count):
+        for _ in range(count):
             self._lock.release()
 
         try:
@@ -279,7 +289,7 @@ def wait(self, timeout=None):
             self._woken_count.release()
 
             # reacquire lock
-            for i in range(count):
+            for _ in range(count):
                 self._lock.acquire()
 
     def notify(self):
@@ -315,7 +325,7 @@ def notify_all(self):
             sleepers += 1
 
         if sleepers:
-            for i in range(sleepers):
+            for _ in range(sleepers):
                 self._woken_count.acquire()       # wait for a sleeper to wake
 
             # rezero wait_semaphore in case some timeouts just happened
@@ -345,7 +355,7 @@ def wait_for(self, predicate, timeout=None):
 # Event
 #
 
-class Event(object):
+class Event:
 
     def __init__(self):
         self._cond = Condition(Lock())
diff --git a/joblib/externals/loky/backend/utils.py b/joblib/externals/loky/backend/utils.py
index dc1b82af2..2956614e4 100644
--- a/joblib/externals/loky/backend/utils.py
+++ b/joblib/externals/loky/backend/utils.py
@@ -4,116 +4,125 @@
 import errno
 import signal
 import warnings
-import threading
 import subprocess
+import traceback
 try:
     import psutil
 except ImportError:
     psutil = None
 
 
-WIN32 = sys.platform == "win32"
-
-
-def _flag_current_thread_clean_exit():
-    """Put a ``_clean_exit`` flag on the current thread"""
-    thread = threading.current_thread()
-    thread._clean_exit = True
+def kill_process_tree(process, use_psutil=True):
+    """Terminate process and its descendants with SIGKILL"""
+    if use_psutil and psutil is not None:
+        _kill_process_tree_with_psutil(process)
+    else:
+        _kill_process_tree_without_psutil(process)
 
 
 def recursive_terminate(process, use_psutil=True):
-    if use_psutil and psutil is not None:
-        _recursive_terminate_with_psutil(process)
-    else:
-        _recursive_terminate_without_psutil(process)
+    warnings.warn(
+        "recursive_terminate is deprecated in loky 3.2, use kill_process_tree"
+        "instead",
+        DeprecationWarning,
+    )
+    kill_process_tree(process, use_psutil=use_psutil)
 
 
-def _recursive_terminate_with_psutil(process, retries=5):
+def _kill_process_tree_with_psutil(process):
     try:
-        children = psutil.Process(process.pid).children(recursive=True)
+        descendants = psutil.Process(process.pid).children(recursive=True)
     except psutil.NoSuchProcess:
         return
 
-    # Kill the children in reverse order to avoid killing the parents before
-    # the children in cases where there are more processes nested.
-    for child in children[::-1]:
+    # Kill the descendants in reverse order to avoid killing the parents before
+    # the descendant in cases where there are more processes nested.
+    for descendant in descendants[::-1]:
         try:
-            child.kill()
+            descendant.kill()
         except psutil.NoSuchProcess:
             pass
 
-    process.terminate()
+    try:
+        psutil.Process(process.pid).kill()
+    except psutil.NoSuchProcess:
+        pass
     process.join()
 
 
-def _recursive_terminate_without_psutil(process):
-    """Terminate a process and its descendants.
-    """
+def _kill_process_tree_without_psutil(process):
+    """Terminate a process and its descendants."""
     try:
-        _recursive_terminate(process.pid)
-    except OSError as e:
-        warnings.warn("Failed to kill subprocesses on this platform. Please"
-                      "install psutil: https://github.com/giampaolo/psutil")
-        # In case we cannot introspect the children, we fall back to the
-        # classic Process.terminate.
-        process.terminate()
+        if sys.platform == "win32":
+            _windows_taskkill_process_tree(process.pid)
+        else:
+            _posix_recursive_kill(process.pid)
+    except Exception:  # pragma: no cover
+        details = traceback.format_exc()
+        warnings.warn(
+            "Failed to kill subprocesses on this platform. Please install"
+            "psutil: https://github.com/giampaolo/psutil\n"
+            f"Details:\n{details}"
+        )
+        # In case we cannot introspect or kill the descendants, we fall back to
+        # only killing the main process.
+        #
+        # Note: on Windows, process.kill() is an alias for process.terminate()
+        # which in turns calls the Win32 API function TerminateProcess().
+        process.kill()
     process.join()
 
 
-def _recursive_terminate(pid):
-    """Recursively kill the descendants of a process before killing it.
-    """
+def _windows_taskkill_process_tree(pid):
+    # On windows, the taskkill function with option `/T` terminate a given
+    # process pid and its children.
+    try:
+        subprocess.check_output(
+            ["taskkill", "/F", "/T", "/PID", str(pid)], stderr=None
+        )
+    except subprocess.CalledProcessError as e:
+        # In Windows, taskkill returns 128, 255 for no process found.
+        if e.returncode not in [128, 255]:
+            # Let's raise to let the caller log the error details in a
+            # warning and only kill the root process.
+            raise  # pragma: no cover
+
+
+def _kill(pid):
+    # Not all systems (e.g. Windows) have a SIGKILL, but the C specification
+    # mandates a SIGTERM signal. While Windows is handled specifically above,
+    # let's try to be safe for other hypothetic platforms that only have
+    # SIGTERM without SIGKILL.
+    kill_signal = getattr(signal, 'SIGKILL', signal.SIGTERM)
+    try:
+        os.kill(pid, kill_signal)
+    except OSError as e:
+        # if OSError is raised with [Errno 3] no such process, the process
+        # is already terminated, else, raise the error and let the top
+        # level function raise a warning and retry to kill the process.
+        if e.errno != errno.ESRCH:
+            raise  # pragma: no cover
 
-    if sys.platform == "win32":
-        # On windows, the taskkill function with option `/T` terminate a given
-        # process pid and its children.
-        try:
-            subprocess.check_output(
-                ["taskkill", "/F", "/T", "/PID", str(pid)],
-                stderr=None)
-        except subprocess.CalledProcessError as e:
-            # In windows, taskkill return 1 for permission denied and 128, 255
-            # for no process found.
-            if e.returncode not in [1, 128, 255]:
-                raise
-            elif e.returncode == 1:
-                # Try to kill the process without its descendants if taskkill
-                # was denied permission. If this fails too, with an error
-                # different from process not found, let the top level function
-                # raise a warning and retry to kill the process.
-                try:
-                    os.kill(pid, signal.SIGTERM)
-                except OSError as e:
-                    if e.errno != errno.ESRCH:
-                        raise
 
-    else:
-        try:
-            children_pids = subprocess.check_output(
-                ["pgrep", "-P", str(pid)],
-                stderr=None
-            )
-        except subprocess.CalledProcessError as e:
-            # `ps` returns 1 when no child process has been found
-            if e.returncode == 1:
-                children_pids = b''
-            else:
-                raise
-
-        # Decode the result, split the cpid and remove the trailing line
-        children_pids = children_pids.decode().split('\n')[:-1]
-        for cpid in children_pids:
-            cpid = int(cpid)
-            _recursive_terminate(cpid)
+def _posix_recursive_kill(pid):
+    """Recursively kill the descendants of a process before killing it."""
+    try:
+        children_pids = subprocess.check_output(
+            ["pgrep", "-P", str(pid)], stderr=None, text=True
+        )
+    except subprocess.CalledProcessError as e:
+        # `ps` returns 1 when no child process has been found
+        if e.returncode == 1:
+            children_pids = ''
+        else:
+            raise  # pragma: no cover
 
-        try:
-            os.kill(pid, signal.SIGTERM)
-        except OSError as e:
-            # if OSError is raised with [Errno 3] no such process, the process
-            # is already terminated, else, raise the error and let the top
-            # level function raise a warning and retry to kill the process.
-            if e.errno != errno.ESRCH:
-                raise
+    # Decode the result, split the cpid and remove the trailing line
+    for cpid in children_pids.splitlines():
+        cpid = int(cpid)
+        _posix_recursive_kill(cpid)
+
+    _kill(pid)
 
 
 def get_exitcodes_terminated_worker(processes):
@@ -129,7 +138,7 @@ def get_exitcodes_terminated_worker(processes):
     # the terminated worker.
     exitcodes = [p.exitcode for p in list(processes.values())
                  if p.exitcode is not None]
-    while len(exitcodes) == 0 and patience > 0:
+    while not exitcodes and patience > 0:
         patience -= 1
         exitcodes = [p.exitcode for p in list(processes.values())
                      if p.exitcode is not None]
@@ -140,7 +149,7 @@ def get_exitcodes_terminated_worker(processes):
 
 def _format_exitcodes(exitcodes):
     """Format a list of exit code with names of the signals if possible"""
-    str_exitcodes = ["{}({})".format(_get_exitcode_name(e), e)
+    str_exitcodes = [f"{_get_exitcode_name(e)}({e})"
                      for e in exitcodes if e is not None]
     return "{" + ", ".join(str_exitcodes) + "}"
 
@@ -154,14 +163,7 @@ def _get_exitcode_name(exitcode):
     if exitcode < 0:
         try:
             import signal
-            if sys.version_info > (3, 5):
-                return signal.Signals(-exitcode).name
-
-            # construct an inverse lookup table
-            for v, k in signal.__dict__.items():
-                if (v.startswith('SIG') and not v.startswith('SIG_') and
-                        k == -exitcode):
-                        return v
+            return signal.Signals(-exitcode).name
         except ValueError:
             return "UNKNOWN"
     elif exitcode != 255:
diff --git a/joblib/externals/loky/cloudpickle_wrapper.py b/joblib/externals/loky/cloudpickle_wrapper.py
index 1bf41a336..0b187e84e 100644
--- a/joblib/externals/loky/cloudpickle_wrapper.py
+++ b/joblib/externals/loky/cloudpickle_wrapper.py
@@ -1,17 +1,12 @@
 import inspect
 from functools import partial
+from joblib.externals.cloudpickle import dumps, loads
 
-try:
-    from joblib.externals.cloudpickle import dumps, loads
-    cloudpickle = True
-except ImportError:
-    cloudpickle = False
 
+WRAP_CACHE = {}
 
-WRAP_CACHE = dict()
 
-
-class CloudpickledObjectWrapper(object):
+class CloudpickledObjectWrapper:
     def __init__(self, obj, keep_wrapper=False):
         self._obj = obj
         self._keep_wrapper = keep_wrapper
@@ -52,9 +47,6 @@ def _reconstruct_wrapper(_pickled_object, keep_wrapper):
 def _wrap_objects_when_needed(obj):
     # Function to introspect an object and decide if it should be wrapped or
     # not.
-    if not cloudpickle:
-        return obj
-
     need_wrap = "__main__" in getattr(obj, "__module__", "")
     if isinstance(obj, partial):
         return partial(
@@ -92,11 +84,6 @@ def wrap_non_picklable_objects(obj, keep_wrapper=True):
     objects in the main scripts and to implement __reduce__ functions for
     complex classes.
     """
-    if not cloudpickle:
-        raise ImportError("could not from joblib.externals import cloudpickle. Please install "
-                          "cloudpickle to allow extended serialization. "
-                          "(`pip install cloudpickle`).")
-
     # If obj is a  class, create a CloudpickledClassWrapper which instantiates
     # the object internally and wrap it directly in a CloudpickledObjectWrapper
     if inspect.isclass(obj):
diff --git a/joblib/externals/loky/initializers.py b/joblib/externals/loky/initializers.py
new file mode 100644
index 000000000..cc4b7b17c
--- /dev/null
+++ b/joblib/externals/loky/initializers.py
@@ -0,0 +1,76 @@
+import warnings
+
+
+def _viztracer_init(init_kwargs):
+    """Initialize viztracer's profiler in worker processes"""
+    from viztracer import VizTracer
+    tracer = VizTracer(**init_kwargs)
+    tracer.register_exit()
+    tracer.start()
+
+
+def _make_viztracer_initializer_and_initargs():
+    try:
+        import viztracer
+        tracer = viztracer.get_tracer()
+        if tracer is not None and getattr(tracer, 'enable', False):
+            # Profiler is active: introspect its configuration to
+            # initialize the workers with the same configuration.
+            return _viztracer_init, (tracer.init_kwargs,)
+    except ImportError:
+        # viztracer is not installed: nothing to do
+        pass
+    except Exception as e:
+        # In case viztracer's API evolve, we do not want to crash loky but
+        # we want to know about it to be able to update loky.
+        warnings.warn(f"Unable to introspect viztracer state: {e}")
+    return None, ()
+
+
+class _ChainedInitializer:
+    """Compound worker initializer
+
+    This is meant to be used in conjunction with _chain_initializers to
+    produce  the necessary chained_args list to be passed to __call__.
+    """
+
+    def __init__(self, initializers):
+        self._initializers = initializers
+
+    def __call__(self, *chained_args):
+        for initializer, args in zip(self._initializers, chained_args):
+            initializer(*args)
+
+
+def _chain_initializers(initializer_and_args):
+    """Convenience helper to combine a sequence of initializers.
+
+    If some initializers are None, they are filtered out.
+    """
+    filtered_initializers = []
+    filtered_initargs = []
+    for initializer, initargs in initializer_and_args:
+        if initializer is not None:
+            filtered_initializers.append(initializer)
+            filtered_initargs.append(initargs)
+
+    if not filtered_initializers:
+        return None, ()
+    elif len(filtered_initializers) == 1:
+        return filtered_initializers[0], filtered_initargs[0]
+    else:
+        return _ChainedInitializer(filtered_initializers), filtered_initargs
+
+
+def _prepare_initializer(initializer, initargs):
+    if initializer is not None and not callable(initializer):
+        raise TypeError(
+            f"initializer must be a callable, got: {initializer!r}"
+        )
+
+    # Introspect runtime to determine if we need to propagate the viztracer
+    # profiler information to the workers:
+    return _chain_initializers([
+        (initializer, initargs),
+        _make_viztracer_initializer_and_initargs(),
+    ])
diff --git a/joblib/externals/loky/process_executor.py b/joblib/externals/loky/process_executor.py
index 41e4a2b57..4e3e819ce 100644
--- a/joblib/externals/loky/process_executor.py
+++ b/joblib/externals/loky/process_executor.py
@@ -4,7 +4,6 @@
 # author: Thomas Moreau and Olivier Grisel
 #
 # adapted from concurrent/futures/process_pool_executor.py (17/02/2017)
-#  * Backport for python2.7/3.3,
 #  * Add an extra management thread to detect executor_manager_thread failures,
 #  * Improve the shutdown process to avoid deadlocks,
 #  * Add timeout for workers,
@@ -62,36 +61,29 @@
 import os
 import gc
 import sys
+import queue
 import struct
 import weakref
 import warnings
 import itertools
 import traceback
 import threading
-from time import time
+from time import time, sleep
 import multiprocessing as mp
 from functools import partial
 from pickle import PicklingError
+from concurrent.futures import Executor
+from concurrent.futures._base import LOGGER
+from concurrent.futures.process import BrokenProcessPool as _BPPException
+from multiprocessing.connection import wait
 
-from . import _base
+from ._base import Future
 from .backend import get_context
-from .backend.compat import queue
-from .backend.compat import wait
-from .backend.compat import set_cause
 from .backend.context import cpu_count
 from .backend.queues import Queue, SimpleQueue
 from .backend.reduction import set_loky_pickler, get_loky_pickler_name
-from .backend.utils import recursive_terminate, get_exitcodes_terminated_worker
-
-try:
-    from concurrent.futures.process import BrokenProcessPool as _BPPException
-except ImportError:
-    _BPPException = RuntimeError
-
-
-# Compatibility for python2.7
-if sys.version_info[0] == 2:
-    ProcessLookupError = OSError
+from .backend.utils import kill_process_tree, get_exitcodes_terminated_worker
+from .initializers import _prepare_initializer
 
 
 # Mechanism to prevent infinite process spawning. When a worker of a
@@ -115,7 +107,9 @@ def _get_memory_usage(pid, force_gc=False):
         if force_gc:
             gc.collect()
 
-        return Process(pid).memory_info().rss
+        mem_size = Process(pid).memory_info().rss
+        mp.util.debug(f'psutil return memory size: {mem_size}')
+        return mem_size
 
 except ImportError:
     _USE_PSUTIL = False
@@ -134,12 +128,7 @@ def close(self):
 
     def wakeup(self):
         if not self._closed:
-            if sys.platform == "win32" and sys.version_info[:2] < (3, 4):
-                # Compat for python2.7 on windows, where poll return false for
-                # b"" messages. Use the slightly larger message b"0".
-                self._writer.send_bytes(b"0")
-            else:
-                self._writer.send_bytes(b"")
+            self._writer.send_bytes(b"")
 
     def clear(self):
         if not self._closed:
@@ -147,7 +136,7 @@ def clear(self):
                 self._reader.recv_bytes()
 
 
-class _ExecutorFlags(object):
+class _ExecutorFlags:
     """necessary references to maintain executor states without preventing gc
 
     It permits to keep the information needed by executor_manager_thread
@@ -195,8 +184,9 @@ def _python_exit():
     global _global_shutdown
     _global_shutdown = True
     items = list(_threads_wakeups.items())
-    mp.util.debug("Interpreter shutting down. Waking up "
-                  "executor_manager_thread {}".format(items))
+    if len(items) > 0:
+        mp.util.debug("Interpreter shutting down. Waking up "
+                      f"executor_manager_thread {items}")
     for _, (shutdown_lock, thread_wakeup) in items:
         with shutdown_lock:
             thread_wakeup.wakeup()
@@ -224,7 +214,7 @@ class _RemoteTraceback(Exception):
     """Embed stringification of remote traceback in local traceback
     """
     def __init__(self, tb=None):
-        self.tb = '\n"""\n{}"""'.format(tb)
+        self.tb = f'\n"""\n{tb}"""'
 
     def __str__(self):
         return self.tb
@@ -246,11 +236,11 @@ def __reduce__(self):
 
 
 def _rebuild_exc(exc, tb):
-    exc = set_cause(exc, _RemoteTraceback(tb))
+    exc.__cause__ = _RemoteTraceback(tb)
     return exc
 
 
-class _WorkItem(object):
+class _WorkItem:
 
     __slots__ = ["future", "fn", "args", "kwargs"]
 
@@ -261,7 +251,7 @@ def __init__(self, future, fn, args, kwargs):
         self.kwargs = kwargs
 
 
-class _ResultItem(object):
+class _ResultItem:
 
     def __init__(self, work_id, exception=None, result=None):
         self.work_id = work_id
@@ -269,7 +259,7 @@ def __init__(self, work_id, exception=None, result=None):
         self.result = result
 
 
-class _CallItem(object):
+class _CallItem:
 
     def __init__(self, work_id, fn, args, kwargs):
         self.work_id = work_id
@@ -285,8 +275,9 @@ def __call__(self):
         return self.fn(*self.args, **self.kwargs)
 
     def __repr__(self):
-        return "CallItem({}, {}, {}, {})".format(
-            self.work_id, self.fn, self.args, self.kwargs)
+        return (
+            f"CallItem({self.work_id}, {self.fn}, {self.args}, {self.kwargs})"
+        )
 
 
 class _SafeQueue(Queue):
@@ -296,7 +287,7 @@ def __init__(self, max_size=0, ctx=None, pending_work_items=None,
         self.thread_wakeup = thread_wakeup
         self.pending_work_items = pending_work_items
         self.running_work_items = running_work_items
-        super(_SafeQueue, self).__init__(max_size, reducers=reducers, ctx=ctx)
+        super().__init__(max_size, reducers=reducers, ctx=ctx)
 
     def _on_queue_feeder_error(self, e, obj):
         if isinstance(obj, _CallItem):
@@ -310,8 +301,7 @@ def _on_queue_feeder_error(self, e, obj):
                     "Could not pickle the task to send it to the workers.")
             tb = traceback.format_exception(
                 type(e), e, getattr(e, "__traceback__", None))
-            raised_error = set_cause(raised_error,
-                                     _RemoteTraceback(''.join(tb)))
+            raised_error.__cause__ = _RemoteTraceback(''.join(tb))
             work_item = self.pending_work_items.pop(obj.work_id, None)
             self.running_work_items.remove(obj.work_id)
             # work_item can be None if another process terminated. In this
@@ -322,15 +312,12 @@ def _on_queue_feeder_error(self, e, obj):
                 del work_item
             self.thread_wakeup.wakeup()
         else:
-            super(_SafeQueue, self)._on_queue_feeder_error(e, obj)
+            super()._on_queue_feeder_error(e, obj)
 
 
 def _get_chunks(chunksize, *iterables):
     """Iterates over zip()ed iterables in chunks. """
-    if sys.version_info < (3, 3):
-        it = itertools.izip(*iterables)
-    else:
-        it = zip(*iterables)
+    it = zip(*iterables)
     while True:
         chunk = tuple(itertools.islice(it, chunksize))
         if not chunk:
@@ -374,8 +361,8 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
             to by the worker.
         initializer: A callable initializer, or None
         initargs: A tuple of args for the initializer
-        process_management_lock: A ctx.Lock avoiding worker timeout while some
-            workers are being spawned.
+        processes_management_lock: A ctx.Lock avoiding worker timeout while
+            some workers are being spawned.
         timeout: maximum time to wait for a new item in the call_queue. If that
             time is expired, the worker will shutdown.
         worker_exit_lock: Lock to avoid flagging the executor as broken on
@@ -386,7 +373,7 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
         try:
             initializer(*initargs)
         except BaseException:
-            _base.LOGGER.critical('Exception in initializer:', exc_info=True)
+            LOGGER.critical('Exception in initializer:', exc_info=True)
             # The parent will notice that the process stopped and
             # mark the pool broken
             return
@@ -398,15 +385,14 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
     _last_memory_leak_check = None
     pid = os.getpid()
 
-    mp.util.debug('Worker started with timeout=%s' % timeout)
+    mp.util.debug(f'Worker started with timeout={timeout}')
     while True:
         try:
             call_item = call_queue.get(block=True, timeout=timeout)
             if call_item is None:
                 mp.util.info("Shutting down worker on sentinel")
         except queue.Empty:
-            mp.util.info("Shutting down worker after timeout %0.3fs"
-                         % timeout)
+            mp.util.info(f"Shutting down worker after timeout {timeout:0.3f}s")
             if processes_management_lock.acquire(block=False):
                 processes_management_lock.release()
                 call_item = None
@@ -421,12 +407,23 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
                 # If we cannot format correctly the exception, at least print
                 # the traceback.
                 print(previous_tb)
+            mp.util.debug('Exiting with code 1')
             sys.exit(1)
         if call_item is None:
-            # Notify queue management thread about clean worker shutdown
+            # Notify queue management thread about worker shutdown
             result_queue.put(pid)
-            with worker_exit_lock:
-                return
+            is_clean = worker_exit_lock.acquire(True, timeout=30)
+
+            # Early notify any loky executor running in this worker process
+            # (nested parallelism) that this process is about to shutdown to
+            # avoid a deadlock waiting undifinitely for the worker to finish.
+            _python_exit()
+
+            if is_clean:
+                mp.util.debug('Exited cleanly')
+            else:
+                mp.util.info('Main process did not release worker_exit')
+            return
         try:
             r = call_item()
         except BaseException as e:
@@ -467,11 +464,12 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
                 mp.util.info("Memory leak detected: shutting down worker")
                 result_queue.put(pid)
                 with worker_exit_lock:
+                    mp.util.debug('Exit due to memory leak')
                     return
         else:
             # if psutil is not installed, trigger gc.collect events
             # regularly to limit potential memory leaks due to reference cycles
-            if ((_last_memory_leak_check is None) or
+            if (_last_memory_leak_check is None or
                     (time() - _last_memory_leak_check >
                      _MEMORY_LEAK_CHECK_DELAY)):
                 gc.collect()
@@ -508,8 +506,12 @@ def __init__(self, executor):
         def weakref_cb(_,
                        thread_wakeup=self.thread_wakeup,
                        shutdown_lock=self.shutdown_lock):
-            mp.util.debug('Executor collected: triggering callback for'
-                          ' QueueManager wakeup')
+            if mp is not None:
+                # At this point, the multiprocessing module can already be
+                # garbage collected. We only log debug info when still
+                # possible.
+                mp.util.debug('Executor collected: triggering callback for'
+                              ' QueueManager wakeup')
             with shutdown_lock:
                 thread_wakeup.wakeup()
 
@@ -542,7 +544,7 @@ def weakref_cb(_,
         # of new processes or shut down
         self.processes_management_lock = executor._processes_management_lock
 
-        super(_ExecutorManagerThread, self).__init__()
+        super().__init__(name="ExecutorManagerThread")
         if sys.version_info < (3, 9):
             self.daemon = True
 
@@ -619,7 +621,7 @@ def wait_result_broken_or_wakeup(self):
                         "A task has failed to un-serialize. Please ensure that"
                         " the arguments of the function are all picklable."
                     )
-                    set_cause(bpe, result_item)
+                    bpe.__cause__ = result_item
                 else:
                     is_broken = False
             except BaseException as e:
@@ -630,7 +632,7 @@ def wait_result_broken_or_wakeup(self):
                 )
                 tb = traceback.format_exception(
                     type(e), e, getattr(e, "__traceback__", None))
-                set_cause(bpe,  _RemoteTraceback(''.join(tb)))
+                bpe.__cause__ = _RemoteTraceback(''.join(tb))
 
         elif wakeup_reader in ready:
             # This is simply a wake-up event that might either trigger putting
@@ -644,14 +646,21 @@ def wait_result_broken_or_wakeup(self):
                 # In Windows, introspecting terminated workers exitcodes seems
                 # unstable, therefore they are not appended in the exception
                 # message.
-                exit_codes = "\nThe exit codes of the workers are {}".format(
-                    get_exitcodes_terminated_worker(self.processes))
+                exit_codes = (
+                    "\nThe exit codes of the workers are "
+                    f"{get_exitcodes_terminated_worker(self.processes)}"
+                )
+            mp.util.debug('A worker unexpectedly terminated. Workers that '
+                          'might have caused the breakage: '
+                          + str({p.name: p.exitcode
+                                 for p in list(self.processes.values())
+                                 if p is not None and p.sentinel in ready}))
             bpe = TerminatedWorkerError(
                 "A worker process managed by the executor was unexpectedly "
                 "terminated. This could be caused by a segmentation fault "
                 "while calling the function or by an excessive memory usage "
                 "causing the Operating System to kill the worker.\n"
-                "{}".format(exit_codes)
+                f"{exit_codes}"
             )
 
         self.thread_wakeup.clear()
@@ -669,9 +678,12 @@ def process_result_item(self, result_item):
             with self.processes_management_lock:
                 p = self.processes.pop(result_item, None)
 
-            # p can be None is the executor is concurrently shutting down.
+            # p can be None if the executor is concurrently shutting down.
             if p is not None:
                 p._worker_exit_lock.release()
+                mp.util.debug(
+                    f"joining {p.name} when processing {p.pid} as result_item"
+                )
                 p.join()
                 del p
 
@@ -690,7 +702,8 @@ def process_result_item(self, result_item):
                         "executor. This can be caused by a too short worker "
                         "timeout or by a memory leak.", UserWarning
                     )
-                    executor._adjust_process_count()
+                    with executor._processes_management_lock:
+                        executor._adjust_process_count()
                     executor = None
         else:
             # Received a _ResultItem so mark the future as completed.
@@ -725,7 +738,7 @@ def terminate_broken(self, bpe):
         self.executor_flags.flag_as_broken(bpe)
 
         # Mark pending tasks as failed.
-        for work_id, work_item in self.pending_work_items.items():
+        for work_item in self.pending_work_items.values():
             work_item.future.set_exception(bpe)
             # Delete references to object. See issue16284
             del work_item
@@ -733,7 +746,7 @@ def terminate_broken(self, bpe):
 
         # Terminate remaining workers forcibly: the queues or their
         # locks may be in a dirty state and block forever.
-        self.kill_workers()
+        self.kill_workers(reason="broken executor")
 
         # clean up resources
         self.join_executor_internals()
@@ -753,17 +766,17 @@ def flag_executor_shutting_down(self):
                 del work_item
 
             # Kill the remaining worker forcibly to no waste time joining them
-            self.kill_workers()
+            self.kill_workers(reason="executor shutting down")
 
-    def kill_workers(self):
+    def kill_workers(self, reason=''):
         # Terminate the remaining workers using SIGKILL. This function also
         # terminates descendant workers of the children in case there is some
         # nested parallelism.
         while self.processes:
             _, p = self.processes.popitem()
-            mp.util.debug('terminate process {}'.format(p.name))
+            mp.util.debug(f"terminate process {p.name}, reason: {reason}")
             try:
-                recursive_terminate(p)
+                kill_process_tree(p)
             except ProcessLookupError:  # pragma: no cover
                 pass
 
@@ -777,22 +790,36 @@ def shutdown_workers(self):
         with self.processes_management_lock:
             n_children_to_stop = 0
             for p in list(self.processes.values()):
+                mp.util.debug(f"releasing worker exit lock on {p.name}")
                 p._worker_exit_lock.release()
                 n_children_to_stop += 1
 
+        mp.util.debug(f"found {n_children_to_stop} processes to stop")
+
         # Send the right number of sentinels, to make sure all children are
         # properly terminated. Do it with a mechanism that avoid hanging on
         # Full queue when all workers have already been shutdown.
         n_sentinels_sent = 0
+        cooldown_time = 0.001
         while (n_sentinels_sent < n_children_to_stop
                 and self.get_n_children_alive() > 0):
-            for i in range(n_children_to_stop - n_sentinels_sent):
+            for _ in range(n_children_to_stop - n_sentinels_sent):
                 try:
                     self.call_queue.put_nowait(None)
                     n_sentinels_sent += 1
-                except queue.Full:
+                except queue.Full as e:
+                    if cooldown_time > 10.0:
+                        raise e
+                    mp.util.info(
+                        "full call_queue prevented to send all sentinels at "
+                        "once, waiting..."
+                    )
+                    sleep(cooldown_time)
+                    cooldown_time *= 2
                     break
 
+        mp.util.debug(f"sent {n_sentinels_sent} sentinels to the call queue")
+
     def join_executor_internals(self):
         self.shutdown_workers()
 
@@ -814,13 +841,23 @@ def join_executor_internals(self):
             self.thread_wakeup.close()
 
         # If .join() is not called on the created processes then
-        # some ctx.Queue methods may deadlock on Mac OS X.
-        mp.util.debug("joining processes")
-        for p in list(self.processes.values()):
-            p.join()
+        # some ctx.Queue methods may deadlock on macOS.
+        with self.processes_management_lock:
+            mp.util.debug(f"joining {len(self.processes)} processes")
+            n_joined_processes = 0
+            while True:
+                try:
+                    pid, p = self.processes.popitem()
+                    mp.util.debug(f"joining process {p.name} with pid {pid}")
+                    p.join()
+                    n_joined_processes += 1
+                except KeyError:
+                    break
 
-        mp.util.debug("executor management thread clean shutdown of worker "
-                      "processes: {}".format(list(self.processes)))
+            mp.util.debug(
+                "executor management thread clean shutdown of "
+                f"{n_joined_processes} workers"
+            )
 
     def get_n_children_alive(self):
         # This is an upper bound on the number of children alive.
@@ -834,9 +871,8 @@ def get_n_children_alive(self):
 
 def _check_system_limits():
     global _system_limits_checked, _system_limited
-    if _system_limits_checked:
-        if _system_limited:
-            raise NotImplementedError(_system_limited)
+    if _system_limits_checked and _system_limited:
+        raise NotImplementedError(_system_limited)
     _system_limits_checked = True
     try:
         nsems_max = os.sysconf("SC_SEM_NSEMS_MAX")
@@ -851,8 +887,10 @@ def _check_system_limits():
         # minimum number of semaphores available
         # according to POSIX
         return
-    _system_limited = ("system provides too few semaphores (%d available, "
-                       "256 necessary)" % nsems_max)
+    _system_limited = (
+        f"system provides too few semaphores ({nsems_max} available, "
+        "256 necessary)"
+    )
     raise NotImplementedError(_system_limited)
 
 
@@ -880,8 +918,8 @@ def _check_max_depth(context):
     if 0 < MAX_DEPTH and _CURRENT_DEPTH + 1 > MAX_DEPTH:
         raise LokyRecursionError(
             "Could not spawn extra nested processes at depth superior to "
-            "MAX_DEPTH={}. If this is intendend, you can change this limit "
-            "with the LOKY_MAX_DEPTH environment variable.".format(MAX_DEPTH))
+            f"MAX_DEPTH={MAX_DEPTH}. If this is intendend, you can change "
+            "this limit with the LOKY_MAX_DEPTH environment variable.")
 
 
 class LokyRecursionError(RuntimeError):
@@ -918,7 +956,7 @@ class ShutdownExecutorError(RuntimeError):
     """
 
 
-class ProcessPoolExecutor(_base.Executor):
+class ProcessPoolExecutor(Executor):
 
     _at_exit = None
 
@@ -947,8 +985,7 @@ def __init__(self, max_workers=None, job_reducers=None,
             initargs: A tuple of arguments to pass to the initializer.
             env: A dict of environment variable to overwrite in the child
                 process. The environment variables are set before any module is
-                loaded. Note that this only works with the loky context and it
-                is unreliable under windows with Python < 3.6.
+                loaded. Note that this only works with the loky context.
         """
         _check_system_limits()
 
@@ -964,11 +1001,9 @@ def __init__(self, max_workers=None, job_reducers=None,
         self._context = context
         self._env = env
 
-        if initializer is not None and not callable(initializer):
-            raise TypeError("initializer must be a callable")
-        self._initializer = initializer
-        self._initargs = initargs
-
+        self._initializer, self._initargs = _prepare_initializer(
+            initializer, initargs
+        )
         _check_max_depth(self._context)
 
         if result_reducers is None:
@@ -1035,17 +1070,6 @@ def _start_executor_manager_thread(self):
         if self._executor_manager_thread is None:
             mp.util.debug('_start_executor_manager_thread called')
 
-            # When the executor gets garbarge collected, the weakref callback
-            # will wake up the queue management thread so that it can terminate
-            # if there is no pending work item.
-            def weakref_cb(
-                    _, thread_wakeup=self._executor_manager_thread_wakeup,
-                    shutdown_lock=self._shutdown_lock):
-                mp.util.debug('Executor collected: triggering callback for'
-                              ' QueueManager wakeup')
-                with self._shutdown_lock:
-                    thread_wakeup.wakeup()
-
             # Start the processes so that their sentinels are known.
             self._executor_manager_thread = _ExecutorManagerThread(self)
             self._executor_manager_thread.start()
@@ -1070,7 +1094,7 @@ def weakref_cb(
                         _python_exit)
 
     def _adjust_process_count(self):
-        for _ in range(len(self._processes), self._max_workers):
+        while len(self._processes) < self._max_workers:
             worker_exit_lock = self._context.BoundedSemaphore(1)
             args = (self._call_queue, self._result_queue, self._initializer,
                     self._initargs, self._processes_management_lock,
@@ -1086,7 +1110,10 @@ def _adjust_process_count(self):
             p._worker_exit_lock = worker_exit_lock
             p.start()
             self._processes[p.pid] = p
-        mp.util.debug('Adjust process count : {}'.format(self._processes))
+        mp.util.debug(
+            f"Adjusted process count to {self._max_workers}: "
+            f"{[(p.name, pid) for pid, p in self._processes.items()]}"
+        )
 
     def _ensure_executor_running(self):
         """ensures all workers and management thread are running
@@ -1110,7 +1137,7 @@ def submit(self, fn, *args, **kwargs):
                 raise RuntimeError('cannot schedule new futures after '
                                    'interpreter shutdown')
 
-            f = _base.Future()
+            f = Future()
             w = _WorkItem(f, fn, args, kwargs)
 
             self._pending_work_items[self._queue_count] = w
@@ -1121,7 +1148,7 @@ def submit(self, fn, *args, **kwargs):
 
             self._ensure_executor_running()
             return f
-    submit.__doc__ = _base.Executor.submit.__doc__
+    submit.__doc__ = Executor.submit.__doc__
 
     def map(self, fn, *iterables, **kwargs):
         """Returns an iterator equivalent to map(fn, iter).
@@ -1150,13 +1177,14 @@ def map(self, fn, *iterables, **kwargs):
         if chunksize < 1:
             raise ValueError("chunksize must be >= 1.")
 
-        results = super(ProcessPoolExecutor, self).map(
+        results = super().map(
             partial(_process_chunk, fn), _get_chunks(chunksize, *iterables),
-            timeout=timeout)
+            timeout=timeout
+        )
         return _chain_from_iterable_of_lists(results)
 
     def shutdown(self, wait=True, kill_workers=False):
-        mp.util.debug('shutting down executor %s' % self)
+        mp.util.debug(f'shutting down executor {self}')
 
         self._flags.flag_as_shutting_down(kill_workers)
         executor_manager_thread = self._executor_manager_thread
@@ -1178,4 +1206,4 @@ def shutdown(self, wait=True, kill_workers=False):
         self._result_queue = None
         self._processes_management_lock = None
 
-    shutdown.__doc__ = _base.Executor.shutdown.__doc__
+    shutdown.__doc__ = Executor.shutdown.__doc__
diff --git a/joblib/externals/loky/reusable_executor.py b/joblib/externals/loky/reusable_executor.py
index 9a8e73f37..6b183a0bf 100644
--- a/joblib/externals/loky/reusable_executor.py
+++ b/joblib/externals/loky/reusable_executor.py
@@ -14,9 +14,6 @@
 
 __all__ = ['get_reusable_executor']
 
-# Python 2 compat helper
-STRING_TYPE = type("")
-
 # Singleton executor and id management
 _executor_lock = threading.RLock()
 _next_executor_id = 0
@@ -79,7 +76,7 @@ def get_reusable_executor(max_workers=None, context=None, timeout=10,
     ``VAR`` are string literals to overwrite the environment variable ``ENV``
     in the child processes to value ``VAL``. The environment variables are set
     in the children before any module is loaded. This only works with with the
-    ``loky`` context and it is unreliable on Windows with Python < 3.6.
+    ``loky`` context.
     """
     _executor, _ = _ReusablePoolExecutor.get_reusable_executor(
         max_workers=max_workers, context=context, timeout=timeout,
@@ -95,7 +92,7 @@ def __init__(self, submit_resize_lock, max_workers=None, context=None,
                  timeout=None, executor_id=0, job_reducers=None,
                  result_reducers=None, initializer=None, initargs=(),
                  env=None):
-        super(_ReusablePoolExecutor, self).__init__(
+        super().__init__(
             max_workers=max_workers, context=context, timeout=timeout,
             job_reducers=job_reducers, result_reducers=result_reducers,
             initializer=initializer, initargs=initargs, env=env)
@@ -118,10 +115,10 @@ def get_reusable_executor(cls, max_workers=None, context=None, timeout=10,
                     max_workers = cpu_count()
             elif max_workers <= 0:
                 raise ValueError(
-                    "max_workers must be greater than 0, got {}."
-                    .format(max_workers))
+                    f"max_workers must be greater than 0, got {max_workers}."
+                )
 
-            if isinstance(context, STRING_TYPE):
+            if isinstance(context, str):
                 context = get_context(context)
             if context is not None and context.get_start_method() == "fork":
                 raise ValueError(
@@ -135,8 +132,9 @@ def get_reusable_executor(cls, max_workers=None, context=None, timeout=10,
                           env=env)
             if executor is None:
                 is_reused = False
-                mp.util.debug("Create a executor with max_workers={}."
-                              .format(max_workers))
+                mp.util.debug(
+                    f"Create a executor with max_workers={max_workers}."
+                )
                 executor_id = _get_next_executor_id()
                 _executor_kwargs = kwargs
                 _executor = executor = cls(
@@ -154,9 +152,10 @@ def get_reusable_executor(cls, max_workers=None, context=None, timeout=10,
                     else:
                         reason = "arguments have changed"
                     mp.util.debug(
-                        "Creating a new executor with max_workers={} as the "
-                        "previous instance cannot be reused ({})."
-                        .format(max_workers, reason))
+                        "Creating a new executor with max_workers= "
+                        f"{max_workers} as the previous instance cannot be "
+                        f"reused ({reason})."
+                    )
                     executor.shutdown(wait=True, kill_workers=kill_workers)
                     _executor = executor = _executor_kwargs = None
                     # Recursive call to build a new instance
@@ -164,8 +163,8 @@ def get_reusable_executor(cls, max_workers=None, context=None, timeout=10,
                                                      **kwargs)
                 else:
                     mp.util.debug(
-                        "Reusing existing executor with max_workers={}."
-                        .format(executor._max_workers)
+                        "Reusing existing executor with "
+                        f"max_workers={executor._max_workers}."
                     )
                     is_reused = True
                     executor._resize(max_workers)
@@ -174,8 +173,7 @@ def get_reusable_executor(cls, max_workers=None, context=None, timeout=10,
 
     def submit(self, fn, *args, **kwargs):
         with self._submit_resize_lock:
-            return super(_ReusablePoolExecutor, self).submit(
-                fn, *args, **kwargs)
+            return super().submit(fn, *args, **kwargs)
 
     def _resize(self, max_workers):
         with self._submit_resize_lock:
@@ -208,25 +206,28 @@ def _resize(self, max_workers):
 
             self._adjust_process_count()
             processes = list(self._processes.values())
-            while not all([p.is_alive() for p in processes]):
+            while not all(p.is_alive() for p in processes):
                 time.sleep(1e-3)
 
     def _wait_job_completion(self):
         """Wait for the cache to be empty before resizing the pool."""
         # Issue a warning to the user about the bad effect of this usage.
-        if len(self._pending_work_items) > 0:
+        if self._pending_work_items:
             warnings.warn("Trying to resize an executor with running jobs: "
                           "waiting for jobs completion before resizing.",
                           UserWarning)
-            mp.util.debug("Executor {} waiting for jobs completion before"
-                          " resizing".format(self.executor_id))
+            mp.util.debug(
+                f"Executor {self.executor_id} waiting for jobs completion "
+                "before resizing"
+            )
         # Wait for the completion of the jobs
-        while len(self._pending_work_items) > 0:
+        while self._pending_work_items:
             time.sleep(1e-3)
 
     def _setup_queues(self, job_reducers, result_reducers):
         # As this executor can be resized, use a large queue size to avoid
         # underestimating capacity and introducing overhead
         queue_size = 2 * cpu_count() + EXTRA_QUEUED_CALLS
-        super(_ReusablePoolExecutor, self)._setup_queues(
-            job_reducers, result_reducers, queue_size=queue_size)
+        super()._setup_queues(
+            job_reducers, result_reducers, queue_size=queue_size
+        )
diff --git a/joblib/func_inspect.py b/joblib/func_inspect.py
index ec6bb4a2f..d334a2b9d 100644
--- a/joblib/func_inspect.py
+++ b/joblib/func_inspect.py
@@ -142,6 +142,13 @@ def get_func_name(func, resolv_alias=True, win_characters=True):
                 # notebooks
                 splitted = parts[-1].split('-')
                 parts[-1] = '-'.join(splitted[:2] + splitted[3:])
+            elif len(parts) > 2 and parts[-2].startswith('ipykernel_'):
+                # In a notebook session (ipykernel). Filename seems to be 'xyz'
+                # of above. parts[-2] has the structure ipykernel_XXXXXX where
+                # XXXXXX is a six-digit number identifying the current run (?).
+                # If we split it off, the function again has the same
+                # identifier across runs.
+                parts[-2] = 'ipykernel'
             filename = '-'.join(parts)
             if filename.endswith('.py'):
                 filename = filename[:-3]
@@ -171,10 +178,9 @@ def get_func_name(func, resolv_alias=True, win_characters=True):
     return module, name
 
 
-def _signature_str(function_name, arg_spec):
+def _signature_str(function_name, arg_sig):
     """Helper function to output a function signature"""
-    arg_spec_str = inspect.formatargspec(*arg_spec)
-    return '{}{}'.format(function_name, arg_spec_str)
+    return '{}{}'.format(function_name, arg_sig)
 
 
 def _function_called_str(function_name, args, kwargs):
@@ -221,20 +227,34 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
             warnings.warn('Cannot inspect object %s, ignore list will '
                           'not work.' % func, stacklevel=2)
         return {'*': args, '**': kwargs}
-    arg_spec = inspect.getfullargspec(func)
-    arg_names = arg_spec.args + arg_spec.kwonlyargs
-    arg_defaults = arg_spec.defaults or ()
-    if arg_spec.kwonlydefaults:
-        arg_defaults = arg_defaults + tuple(arg_spec.kwonlydefaults[k]
-                                            for k in arg_spec.kwonlyargs
-                                            if k in arg_spec.kwonlydefaults)
-    arg_varargs = arg_spec.varargs
-    arg_varkw = arg_spec.varkw
-
+    arg_sig = inspect.signature(func)
+    arg_names = []
+    arg_defaults = []
+    arg_kwonlyargs = []
+    arg_varargs = None
+    arg_varkw = None
+    for param in arg_sig.parameters.values():
+        if param.kind is param.POSITIONAL_OR_KEYWORD:
+            arg_names.append(param.name)
+        elif param.kind is param.KEYWORD_ONLY:
+            arg_names.append(param.name)
+            arg_kwonlyargs.append(param.name)
+        elif param.kind is param.VAR_POSITIONAL:
+            arg_varargs = param.name
+        elif param.kind is param.VAR_KEYWORD:
+            arg_varkw = param.name
+        if param.default is not param.empty:
+            arg_defaults.append(param.default)
     if inspect.ismethod(func):
         # First argument is 'self', it has been removed by Python
         # we need to add it back:
         args = [func.__self__, ] + args
+        # func is an instance method, inspect.signature(func) does not
+        # include self, we need to fetch it from the class method, i.e
+        # func.__func__
+        class_method_sig = inspect.signature(func.__func__)
+        self_name = next(iter(class_method_sig.parameters))
+        arg_names = [self_name] + arg_names
     # XXX: Maybe I need an inspect.isbuiltin to detect C-level methods, such
     # as on ndarrays.
 
@@ -244,7 +264,7 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
     for arg_position, arg_name in enumerate(arg_names):
         if arg_position < len(args):
             # Positional argument or keyword argument given as positional
-            if arg_name not in arg_spec.kwonlyargs:
+            if arg_name not in arg_kwonlyargs:
                 arg_dict[arg_name] = args[arg_position]
             else:
                 raise ValueError(
@@ -252,7 +272,7 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
                     'positional parameter for %s:\n'
                     '     %s was called.'
                     % (arg_name,
-                       _signature_str(name, arg_spec),
+                       _signature_str(name, arg_sig),
                        _function_called_str(name, args, kwargs))
                 )
 
@@ -268,7 +288,7 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
                     raise ValueError(
                         'Wrong number of arguments for %s:\n'
                         '     %s was called.'
-                        % (_signature_str(name, arg_spec),
+                        % (_signature_str(name, arg_sig),
                            _function_called_str(name, args, kwargs))
                     ) from e
 
@@ -296,7 +316,7 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
             raise ValueError("Ignore list: argument '%s' is not defined for "
                              "function %s"
                              % (item,
-                                _signature_str(name, arg_spec))
+                                _signature_str(name, arg_sig))
                              )
     # XXX: Return a sorted list of pairs?
     return arg_dict
diff --git a/joblib/hashing.py b/joblib/hashing.py
index 24aeb559d..b983e84fa 100644
--- a/joblib/hashing.py
+++ b/joblib/hashing.py
@@ -193,7 +193,7 @@ def save(self, obj):
                 obj_c_contiguous = obj.T
             else:
                 # Cater for non-single-segment arrays: this creates a
-                # copy, and thus aleviates this issue.
+                # copy, and thus alleviates this issue.
                 # XXX: There might be a more efficient way of doing this
                 obj_c_contiguous = obj.flatten()
 
diff --git a/joblib/memory.py b/joblib/memory.py
index 424d9fea6..fdc58fdfd 100644
--- a/joblib/memory.py
+++ b/joblib/memory.py
@@ -19,7 +19,6 @@
 import traceback
 import warnings
 import inspect
-import sys
 import weakref
 
 from tokenize import open as open_py_source
@@ -33,7 +32,6 @@
 from ._store_backends import StoreBackendBase, FileSystemStoreBackend
 
 
-
 FIRST_LINE_TEXT = "# first line:"
 
 # TODO: The following object should have a data store object as a sub
@@ -132,11 +130,10 @@ def _store_backend_factory(backend, location, verbose=0, backend_options=None):
         return obj
     elif location is not None:
         warnings.warn(
-            "Instanciating a backend using a {} as a location is not "
+            "Instantiating a backend using a {} as a location is not "
             "supported by joblib. Returning None instead.".format(
                 location.__class__.__name__), UserWarning)
 
-
     return None
 
 
@@ -199,7 +196,7 @@ class MemorizedResult(Logger):
 
     func: function or str
         function whose output is cached. The string case is intended only for
-        instanciation based on the output of repr() on another instance.
+        instantiation based on the output of repr() on another instance.
         (namely eval(repr(memorized_instance)) works).
 
     argument_hash: str
@@ -361,6 +358,12 @@ def clear(self, warn=True):
         # Argument "warn" is for compatibility with MemorizedFunc.clear
         pass
 
+    def call(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+    def check_call_in_cache(self, *args, **kwargs):
+        return False
+
 
 ###############################################################################
 # class `MemorizedFunc`
@@ -484,7 +487,7 @@ def _cached_call(self, args, kwargs, shelving=False):
         metadata = None
         msg = None
 
-        # Wether or not the memorized function must be called
+        # Whether or not the memorized function must be called
         must_call = False
 
         # FIXME: The statements below should be try/excepted
@@ -560,8 +563,8 @@ def func_code_info(self):
             # (which should be called once on self) gets called in the process
             # in which self.func was defined, this caching mechanism prevents
             # undesired cache clearing when the cached function is called in
-            # an environement where the introspection utilities get_func_code
-            # relies on do not work (typicially, in joblib child processes).
+            # an environment where the introspection utilities get_func_code
+            # relies on do not work (typically, in joblib child processes).
             # See #1035 for  more info
             # TODO (pierreglaser): do the same with get_func_name?
             self._func_code_info = get_func_code(self.func)
@@ -606,6 +609,21 @@ def __getstate__(self):
 
         return state
 
+    def check_call_in_cache(self, *args, **kwargs):
+        """Check if function call is in the memory cache.
+
+        Does not call the function or do any work besides func inspection
+        and arg hashing.
+
+        Returns
+        -------
+        is_call_in_cache: bool
+            Whether or not the result of the function has been cached
+            for the input arguments that have been passed.
+        """
+        func_id, args_id = self._get_output_identifiers(*args, **kwargs)
+        return self.store_backend.contains_item((func_id, args_id))
+
     # ------------------------------------------------------------------------
     # Private interface
     # ------------------------------------------------------------------------
@@ -683,8 +701,8 @@ def _check_previous_func_code(self, stacklevel=2):
                 extract_first_line(
                     self.store_backend.get_cached_func_code([func_id]))
         except (IOError, OSError):  # some backend can also raise OSError
-                self._write_func_code(func_code, first_line)
-                return False
+            self._write_func_code(func_code, first_line)
+            return False
         if old_func_code == func_code:
             return True
 
@@ -821,8 +839,6 @@ def _persist_input(self, duration, args, kwargs, this_duration_limit=0.5):
                           % this_duration, stacklevel=5)
         return metadata
 
-    # XXX: Need a method to check if results are available.
-
     # ------------------------------------------------------------------------
     # Private `object` interface
     # ------------------------------------------------------------------------
@@ -848,7 +864,7 @@ class Memory(Logger):
 
         Parameters
         ----------
-        location: str or None
+        location: str, pathlib.Path or None
             The path of the base directory to use as a data store
             or None. If None is given, no caching is done and
             the Memory object is completely transparent. This option
@@ -860,12 +876,6 @@ class Memory(Logger):
             The 'local' backend is using regular filesystem operations to
             manipulate data (open, mv, etc) in the backend.
 
-        cachedir: str or None, optional
-
-            .. deprecated: 0.12
-                'cachedir' has been deprecated in 0.12 and will be
-                removed in 0.14. Use the 'location' parameter instead.
-
         mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
             The memmapping mode used when loading from cache
             numpy arrays. See numpy.load for the meaning of the
@@ -882,20 +892,24 @@ class Memory(Logger):
             as functions are evaluated.
 
         bytes_limit: int, optional
-            Limit in bytes of the size of the cache.
+            Limit in bytes of the size of the cache. By default, the size of
+            the cache is unlimited. When reducing the size of the cache,
+            ``joblib`` keeps the most recently accessed items first.
+
+            **Note:** You need to call :meth:`joblib.Memory.reduce_size` to
+            actually reduce the cache size to be less than ``bytes_limit``.
 
         backend_options: dict, optional
-            Contains a dictionnary of named parameters used to configure
+            Contains a dictionary of named parameters used to configure
             the store backend.
     """
     # ------------------------------------------------------------------------
     # Public interface
     # ------------------------------------------------------------------------
 
-    def __init__(self, location=None, backend='local', cachedir=None,
+    def __init__(self, location=None, backend='local',
                  mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
                  backend_options=None):
-        # XXX: Bad explanation of the None value of cachedir
         Logger.__init__(self)
         self._verbose = verbose
         self.mmap_mode = mmap_mode
@@ -910,22 +924,6 @@ def __init__(self, location=None, backend='local', cachedir=None,
         if compress and mmap_mode is not None:
             warnings.warn('Compressed results cannot be memmapped',
                           stacklevel=2)
-        if cachedir is not None:
-            if location is not None:
-                raise ValueError(
-                    'You set both "location={0!r} and "cachedir={1!r}". '
-                    "'cachedir' has been deprecated in version "
-                    "0.12 and will be removed in version 0.14.\n"
-                    'Please only set "location={0!r}"'.format(
-                        location, cachedir))
-
-            warnings.warn(
-                "The 'cachedir' parameter has been deprecated in version "
-                "0.12 and will be removed in version 0.14.\n"
-                'You provided "cachedir={0!r}", '
-                'use "location={0!r}" instead.'.format(cachedir),
-                DeprecationWarning, stacklevel=2)
-            location = cachedir
 
         self.location = location
         if isinstance(location, str):
@@ -936,17 +934,6 @@ def __init__(self, location=None, backend='local', cachedir=None,
             backend_options=dict(compress=compress, mmap_mode=mmap_mode,
                                  **backend_options))
 
-    @property
-    def cachedir(self):
-        warnings.warn(
-            "The 'cachedir' attribute has been deprecated in version 0.12 "
-            "and will be removed in version 0.14.\n"
-            "Use os.path.join(memory.location, 'joblib') attribute instead.",
-            DeprecationWarning, stacklevel=2)
-        if self.location is None:
-            return None
-        return os.path.join(self.location, 'joblib')
-
     def cache(self, func=None, ignore=None, verbose=None, mmap_mode=False):
         """ Decorates the given function func to only compute its return
             value for input arguments not cached on disk.
@@ -1000,6 +987,12 @@ def clear(self, warn=True):
         if self.store_backend is not None:
             self.store_backend.clear()
 
+            # As the cache in completely clear, make sure the _FUNCTION_HASHES
+            # cache is also reset. Else, for a function that is present in this
+            # table, results cached after this clear will be have cache miss
+            # as the function code is not re-written.
+            _FUNCTION_HASHES.clear()
+
     def reduce_size(self):
         """Remove cache elements to make cache size fit in ``bytes_limit``."""
         if self.bytes_limit is not None and self.store_backend is not None:
diff --git a/joblib/numpy_pickle.py b/joblib/numpy_pickle.py
index 93e5537ea..fa450fbba 100644
--- a/joblib/numpy_pickle.py
+++ b/joblib/numpy_pickle.py
@@ -7,10 +7,8 @@
 import pickle
 import os
 import warnings
-try:
-    from pathlib import Path
-except ImportError:
-    Path = None
+import io
+from pathlib import Path
 
 from .compressor import lz4, LZ4_NOT_INSTALLED_ERROR
 from .compressor import _COMPRESSORS, register_compressor, BinaryZlibFile
@@ -20,6 +18,7 @@
 from .numpy_pickle_utils import Unpickler, Pickler
 from .numpy_pickle_utils import _read_fileobject, _write_fileobject
 from .numpy_pickle_utils import _read_bytes, BUFFER_SIZE
+from .numpy_pickle_utils import _ensure_native_byte_order
 from .numpy_pickle_compat import load_compatibility
 from .numpy_pickle_compat import NDArrayWrapper
 # For compatibility with old versions of joblib, we need ZNDArrayWrapper
@@ -41,6 +40,11 @@
 ###############################################################################
 # Utility objects for persistence.
 
+# For convenience, 16 bytes are used to be sure to cover all the possible
+# dtypes' alignments. For reference, see:
+# https://numpy.org/devdocs/dev/alignment.html
+NUMPY_ARRAY_ALIGNMENT_BYTES = 16
+
 
 class NumpyArrayWrapper(object):
     """An object to be persisted instead of numpy arrays.
@@ -72,13 +76,23 @@ class NumpyArrayWrapper(object):
         Default: False.
     """
 
-    def __init__(self, subclass, shape, order, dtype, allow_mmap=False):
+    def __init__(self, subclass, shape, order, dtype, allow_mmap=False,
+                 numpy_array_alignment_bytes=NUMPY_ARRAY_ALIGNMENT_BYTES):
         """Constructor. Store the useful information for later."""
         self.subclass = subclass
         self.shape = shape
         self.order = order
         self.dtype = dtype
         self.allow_mmap = allow_mmap
+        # We make numpy_array_alignment_bytes an instance attribute to allow us
+        # to change our mind about the default alignment and still load the old
+        # pickles (with the previous alignment) correctly
+        self.numpy_array_alignment_bytes = numpy_array_alignment_bytes
+
+    def safe_get_numpy_array_alignment_bytes(self):
+        # NumpyArrayWrapper instances loaded from joblib <= 1.1 pickles don't
+        # have an numpy_array_alignment_bytes attribute
+        return getattr(self, 'numpy_array_alignment_bytes', None)
 
     def write_array(self, array, pickler):
         """Write array bytes to pickler file handle.
@@ -94,6 +108,23 @@ def write_array(self, array, pickler):
             # pickle protocol.
             pickle.dump(array, pickler.file_handle, protocol=2)
         else:
+            numpy_array_alignment_bytes = \
+                self.safe_get_numpy_array_alignment_bytes()
+            if numpy_array_alignment_bytes is not None:
+                current_pos = pickler.file_handle.tell()
+                pos_after_padding_byte = current_pos + 1
+                padding_length = numpy_array_alignment_bytes - (
+                    pos_after_padding_byte % numpy_array_alignment_bytes)
+                # A single byte is written that contains the padding length in
+                # bytes
+                padding_length_byte = int.to_bytes(
+                    padding_length, length=1, byteorder='little')
+                pickler.file_handle.write(padding_length_byte)
+
+                if padding_length != 0:
+                    padding = b'\xff' * padding_length
+                    pickler.file_handle.write(padding)
+
             for chunk in pickler.np.nditer(array,
                                            flags=['external_loop',
                                                   'buffered',
@@ -120,6 +151,15 @@ def read_array(self, unpickler):
             # The array contained Python objects. We need to unpickle the data.
             array = pickle.load(unpickler.file_handle)
         else:
+            numpy_array_alignment_bytes = \
+                self.safe_get_numpy_array_alignment_bytes()
+            if numpy_array_alignment_bytes is not None:
+                padding_byte = unpickler.file_handle.read(1)
+                padding_length = int.from_bytes(
+                    padding_byte, byteorder='little')
+                if padding_length != 0:
+                    unpickler.file_handle.read(padding_length)
+
             # This is not a real file. We have to read it the
             # memory-intensive way.
             # crc32 module fails on reads greater than 2 ** 32 bytes,
@@ -147,11 +187,22 @@ def read_array(self, unpickler):
             else:
                 array.shape = self.shape
 
-        return array
+        # Detect byte order mismatch and swap as needed.
+        return _ensure_native_byte_order(array)
 
     def read_mmap(self, unpickler):
         """Read an array using numpy memmap."""
-        offset = unpickler.file_handle.tell()
+        current_pos = unpickler.file_handle.tell()
+        offset = current_pos
+        numpy_array_alignment_bytes = \
+            self.safe_get_numpy_array_alignment_bytes()
+
+        if numpy_array_alignment_bytes is not None:
+            padding_byte = unpickler.file_handle.read(1)
+            padding_length = int.from_bytes(padding_byte, byteorder='little')
+            # + 1 is for the padding byte
+            offset += padding_length + 1
+
         if unpickler.mmap_mode == 'w+':
             unpickler.mmap_mode = 'r+'
 
@@ -164,6 +215,20 @@ def read_mmap(self, unpickler):
         # update the offset so that it corresponds to the end of the read array
         unpickler.file_handle.seek(offset + marray.nbytes)
 
+        if (numpy_array_alignment_bytes is None and
+                current_pos % NUMPY_ARRAY_ALIGNMENT_BYTES != 0):
+            message = (
+                f'The memmapped array {marray} loaded from the file '
+                f'{unpickler.file_handle.name} is not not bytes aligned. '
+                'This may cause segmentation faults if this memmapped array '
+                'is used in some libraries like BLAS or PyTorch. '
+                'To get rid of this warning, regenerate your pickle file '
+                'with joblib >= 1.2.0. '
+                'See https://github.com/joblib/joblib/issues/563 '
+                'for more details'
+            )
+            warnings.warn(message)
+
         return marray
 
     def read(self, unpickler):
@@ -240,9 +305,17 @@ def _create_array_wrapper(self, array):
         order = 'F' if (array.flags.f_contiguous and
                         not array.flags.c_contiguous) else 'C'
         allow_mmap = not self.buffered and not array.dtype.hasobject
+
+        kwargs = {}
+        try:
+            self.file_handle.tell()
+        except io.UnsupportedOperation:
+            kwargs = {'numpy_array_alignment_bytes': None}
+
         wrapper = NumpyArrayWrapper(type(array),
                                     array.shape, order, array.dtype,
-                                    allow_mmap=allow_mmap)
+                                    allow_mmap=allow_mmap,
+                                    **kwargs)
 
         return wrapper
 
diff --git a/joblib/numpy_pickle_compat.py b/joblib/numpy_pickle_compat.py
index 6541a066a..5316c0225 100644
--- a/joblib/numpy_pickle_compat.py
+++ b/joblib/numpy_pickle_compat.py
@@ -9,7 +9,7 @@
 
 from .numpy_pickle_utils import _ZFILE_PREFIX
 from .numpy_pickle_utils import Unpickler
-
+from .numpy_pickle_utils import _ensure_native_byte_order
 
 def hex_str(an_int):
     """Convert an int to an hexadecimal string."""
@@ -63,7 +63,7 @@ def write_zfile(file_handle, data, compress=1):
     """Write the data in the given file as a Z-file.
 
     Z-files are raw data compressed with zlib used internally by joblib
-    for persistence. Backward compatibility is not guarantied. Do not
+    for persistence. Backward compatibility is not guaranteed. Do not
     use for external purposes.
     """
     file_handle.write(_ZFILE_PREFIX)
@@ -105,6 +105,9 @@ def read(self, unpickler):
             kwargs["allow_pickle"] = True
         array = unpickler.np.load(filename, **kwargs)
 
+        # Detect byte order mismatch and swap as needed.
+        array = _ensure_native_byte_order(array)
+
         # Reconstruct subclasses. This does not work with old
         # versions of numpy
         if (hasattr(array, '__array_prepare__') and
diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py
index a50105547..71f2c7c59 100644
--- a/joblib/numpy_pickle_utils.py
+++ b/joblib/numpy_pickle_utils.py
@@ -6,6 +6,7 @@
 
 import pickle
 import io
+import sys
 import warnings
 import contextlib
 
@@ -48,6 +49,30 @@ def _get_prefixes_max_len():
     return max(prefixes)
 
 
+def _is_numpy_array_byte_order_mismatch(array):
+    """Check if numpy array is having byte order mismatch"""
+    return ((sys.byteorder == 'big' and
+             (array.dtype.byteorder == '<' or
+              (array.dtype.byteorder == '|' and array.dtype.fields and
+               all(e[0].byteorder == '<'
+                   for e in array.dtype.fields.values())))) or
+            (sys.byteorder == 'little' and
+             (array.dtype.byteorder == '>' or
+              (array.dtype.byteorder == '|' and array.dtype.fields and
+               all(e[0].byteorder == '>'
+                   for e in array.dtype.fields.values())))))
+
+
+def _ensure_native_byte_order(array):
+    """Use the byte order of the host while preserving values
+
+    Does nothing if array already uses the system byte order.
+    """
+    if _is_numpy_array_byte_order_mismatch(array):
+        array = array.byteswap().newbyteorder('=')
+    return array
+
+
 ###############################################################################
 # Cache file utilities
 def _detect_compressor(fileobj):
diff --git a/joblib/parallel.py b/joblib/parallel.py
index 17a9f2313..6e7b1b19a 100644
--- a/joblib/parallel.py
+++ b/joblib/parallel.py
@@ -27,7 +27,7 @@
                                  ThreadingBackend, SequentialBackend,
                                  LokyBackend)
 from .externals.cloudpickle import dumps, loads
-from .externals import loky
+from ._utils import eval_expr
 
 # Make sure that those two classes are part of the public joblib.parallel API
 # so that 3rd party backend implementers can import them from here.
@@ -36,15 +36,28 @@
 
 
 BACKENDS = {
-    'multiprocessing': MultiprocessingBackend,
     'threading': ThreadingBackend,
     'sequential': SequentialBackend,
-    'loky': LokyBackend,
 }
 # name of the backend used by default by Parallel outside of any context
 # managed by ``parallel_backend``.
-DEFAULT_BACKEND = 'loky'
+
+# threading is the only backend that is always everywhere
+DEFAULT_BACKEND = 'threading'
+
 DEFAULT_N_JOBS = 1
+
+MAYBE_AVAILABLE_BACKENDS = {'multiprocessing', 'loky'}
+
+# if multiprocessing is available, so is loky, we set it as the default
+# backend
+if mp is not None:
+    BACKENDS['multiprocessing'] = MultiprocessingBackend
+    from .externals import loky
+    BACKENDS['loky'] = LokyBackend
+    DEFAULT_BACKEND = 'loky'
+
+
 DEFAULT_THREAD_BACKEND = 'threading'
 
 # Thread local value that can be overridden by the ``parallel_backend`` context
@@ -123,7 +136,7 @@ class parallel_backend(object):
     """Change the default backend used by Parallel inside a with block.
 
     If ``backend`` is a string it must match a previously registered
-    implementation using the ``register_parallel_backend`` function.
+    implementation using the :func:`~register_parallel_backend` function.
 
     By default the following backends are available:
 
@@ -135,7 +148,9 @@ class parallel_backend(object):
     'threading' is a low-overhead alternative that is most efficient for
     functions that release the Global Interpreter Lock: e.g. I/O-bound code or
     CPU-bound code in a few calls to native code that explicitly releases the
-    GIL.
+    GIL. Note that on some rare systems (such as pyiodine),
+    multiprocessing and loky may not be available, in which case joblib
+    defaults to threading.
 
     In addition, if the `dask` and `distributed` Python packages are installed,
     it is possible to use the 'dask' backend for better scheduling of nested
@@ -158,9 +173,9 @@ class parallel_backend(object):
     caller passes an explicit value for the ``n_jobs`` parameter.
 
     This is an alternative to passing a ``backend='backend_name'`` argument to
-    the ``Parallel`` class constructor. It is particularly useful when calling
-    into library code that uses joblib internally but does not expose the
-    backend argument in its own API.
+    the :class:`~Parallel` class constructor. It is particularly useful when
+    calling into library code that uses joblib internally but does not expose
+    the backend argument in its own API.
 
     >>> from operator import neg
     >>> with parallel_backend('threading'):
@@ -184,9 +199,20 @@ class parallel_backend(object):
     def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None,
                  **backend_params):
         if isinstance(backend, str):
-            if backend not in BACKENDS and backend in EXTERNAL_BACKENDS:
-                register = EXTERNAL_BACKENDS[backend]
-                register()
+            if backend not in BACKENDS:
+                if backend in EXTERNAL_BACKENDS:
+                    register = EXTERNAL_BACKENDS[backend]
+                    register()
+                elif backend in MAYBE_AVAILABLE_BACKENDS:
+                    warnings.warn(
+                        f"joblib backend '{backend}' is not available on "
+                        f"your system, falling back to {DEFAULT_BACKEND}.",
+                        UserWarning,
+                        stacklevel=2)
+                    BACKENDS[backend] = BACKENDS[DEFAULT_BACKEND]
+                else:
+                    raise ValueError("Invalid backend: %s, expected one of %r"
+                                     % (backend, sorted(BACKENDS.keys())))
 
             backend = BACKENDS[backend](**backend_params)
 
@@ -364,8 +390,8 @@ def register_parallel_backend(name, factory, make_default=False):
     """Register a new Parallel backend factory.
 
     The new backend can then be selected by passing its name as the backend
-    argument to the Parallel class. Moreover, the default backend can be
-    overwritten globally by setting make_default=True.
+    argument to the :class:`~Parallel` class. Moreover, the default backend can
+    be overwritten globally by setting make_default=True.
 
     The factory can be any callable that takes no argument and return an
     instance of ``ParallelBackendBase``.
@@ -428,15 +454,17 @@ class Parallel(Logger):
             CPUs but one are used.
             None is a marker for 'unset' that will be interpreted as n_jobs=1
             (sequential execution) unless the call is performed under a
-            parallel_backend context manager that sets another value for
-            n_jobs.
+            :func:`~parallel_backend` context manager that sets another value
+            for n_jobs.
         backend: str, ParallelBackendBase instance or None, default: 'loky'
             Specify the parallelization backend implementation.
             Supported backends are:
 
             - "loky" used by default, can induce some
               communication and memory overhead when exchanging input and
-              output data with the worker Python processes.
+              output data with the worker Python processes. On some rare
+              systems (such as Pyiodide), the loky backend may not be
+              available.
             - "multiprocessing" previous process-based backend based on
               `multiprocessing.Pool`. Less robust than `loky`.
             - "threading" is a very low-overhead backend but it suffers
@@ -447,18 +475,18 @@ class Parallel(Logger):
               in a "with nogil" block or an expensive call to a library such
               as NumPy).
             - finally, you can register backends by calling
-              register_parallel_backend. This will allow you to implement
-              a backend of your liking.
+              :func:`~register_parallel_backend`. This will allow you to
+              implement a backend of your liking.
 
             It is not recommended to hard-code the backend name in a call to
-            Parallel in a library. Instead it is recommended to set soft hints
-            (prefer) or hard constraints (require) so as to make it possible
-            for library users to change the backend from the outside using the
-            parallel_backend context manager.
+            :class:`~Parallel` in a library. Instead it is recommended to set
+            soft hints (prefer) or hard constraints (require) so as to make it
+            possible for library users to change the backend from the outside
+            using the :func:`~parallel_backend` context manager.
         prefer: str in {'processes', 'threads'} or None, default: None
             Soft hint to choose the default backend if no specific backend
-            was selected with the parallel_backend context manager. The
-            default process-based backend is 'loky' and the default
+            was selected with the :func:`~parallel_backend` context manager.
+            The default process-based backend is 'loky' and the default
             thread-based backend is 'threading'. Ignored if the ``backend``
             parameter is specified.
         require: 'sharedmem' or None, default None
@@ -477,7 +505,9 @@ class Parallel(Logger):
         pre_dispatch: {'all', integer, or expression, as in '3*n_jobs'}
             The number of batches (of tasks) to be pre-dispatched.
             Default is '2*n_jobs'. When batch_size="auto" this is reasonable
-            default and the workers should never starve.
+            default and the workers should never starve. Note that only basic
+            arithmetics are allowed here and no modules can be used in this
+            expression.
         batch_size: int or 'auto', default: 'auto'
             The number of atomic tasks to dispatch at once to each
             worker. When individual evaluations are very fast, dispatching
@@ -513,9 +543,11 @@ class Parallel(Logger):
             in Bytes, or a human-readable string, e.g., '1M' for 1 megabyte.
             Use None to disable memmapping of large arrays.
             Only active when backend="loky" or "multiprocessing".
-        mmap_mode: {None, 'r+', 'r', 'w+', 'c'}
-            Memmapping mode for numpy arrays passed to workers.
-            See 'max_nbytes' parameter documentation for more details.
+        mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, default: 'r'
+            Memmapping mode for numpy arrays passed to workers. None will
+            disable memmapping, other modes defined in the numpy.memmap doc:
+            https://numpy.org/doc/stable/reference/generated/numpy.memmap.html
+            Also, see 'max_nbytes' parameter documentation for more details.
 
         Notes
         -----
@@ -688,6 +720,16 @@ def __init__(self, n_jobs=None, backend=None, verbose=0, timeout=None,
             # preload modules on the forkserver helper process.
             self._backend_args['context'] = backend
             backend = MultiprocessingBackend(nesting_level=nesting_level)
+
+        elif backend not in BACKENDS and backend in MAYBE_AVAILABLE_BACKENDS:
+            warnings.warn(
+                f"joblib backend '{backend}' is not available on "
+                f"your system, falling back to {DEFAULT_BACKEND}.",
+                UserWarning,
+                stacklevel=2)
+            BACKENDS[backend] = BACKENDS[DEFAULT_BACKEND]
+            backend = BACKENDS[DEFAULT_BACKEND](nesting_level=nesting_level)
+
         else:
             try:
                 backend_factory = BACKENDS[backend]
@@ -1010,7 +1052,9 @@ def _batched_calls_reducer_callback():
         else:
             self._original_iterator = iterator
             if hasattr(pre_dispatch, 'endswith'):
-                pre_dispatch = eval(pre_dispatch)
+                pre_dispatch = eval_expr(
+                    pre_dispatch.replace("n_jobs", str(n_jobs))
+                )
             self._pre_dispatch_amount = pre_dispatch = int(pre_dispatch)
 
             # The main thread will consume the first pre_dispatch items and
diff --git a/joblib/test/data/create_numpy_pickle.py b/joblib/test/data/create_numpy_pickle.py
index 0128f91ed..3a3a311fe 100644
--- a/joblib/test/data/create_numpy_pickle.py
+++ b/joblib/test/data/create_numpy_pickle.py
@@ -64,7 +64,7 @@ def write_test_pickle(to_pickle, args):
         print("Error: cannot generate file '{}' with arguments '{}'. "
               "Error was: {}".format(pickle_filename, kwargs, e))
     else:
-        print("File '{}' generated successfuly.".format(pickle_filename))
+        print("File '{}' generated successfully.".format(pickle_filename))
 
 if __name__ == '__main__':
     import argparse
diff --git a/joblib/test/test_cloudpickle_wrapper.py b/joblib/test/test_cloudpickle_wrapper.py
new file mode 100644
index 000000000..733f51c72
--- /dev/null
+++ b/joblib/test/test_cloudpickle_wrapper.py
@@ -0,0 +1,27 @@
+"""
+Test that our implementation of wrap_non_picklable_objects mimics
+properly the loky implementation.
+"""
+
+from .._cloudpickle_wrapper import wrap_non_picklable_objects
+from .._cloudpickle_wrapper import my_wrap_non_picklable_objects
+
+
+def a_function(x):
+    return x
+
+
+class AClass(object):
+
+    def __call__(self, x):
+        return x
+
+
+def test_wrap_non_picklable_objects():
+    # Mostly a smoke test: test that we can use callable in the same way
+    # with both our implementation of wrap_non_picklable_objects and the
+    # upstream one
+    for obj in (a_function, AClass()):
+        wrapped_obj = wrap_non_picklable_objects(obj)
+        my_wrapped_obj = my_wrap_non_picklable_objects(obj)
+        assert wrapped_obj(1) == my_wrapped_obj(1)
diff --git a/joblib/test/test_dask.py b/joblib/test/test_dask.py
index feb112040..9f072a128 100644
--- a/joblib/test/test_dask.py
+++ b/joblib/test/test_dask.py
@@ -1,5 +1,6 @@
 from __future__ import print_function, division, absolute_import
 import os
+import warnings
 
 import pytest
 from random import random
@@ -11,6 +12,7 @@
 from .._dask import DaskDistributedBackend
 
 distributed = pytest.importorskip('distributed')
+dask = pytest.importorskip('dask')
 from distributed import Client, LocalCluster, get_client
 from distributed.metrics import time
 from distributed.utils_test import cluster, inc
@@ -114,7 +116,7 @@ def f(dask_scheduler):
 
 def test_no_undesired_distributed_cache_hit(loop):
     # Dask has a pickle cache for callables that are called many times. Because
-    # the dask backends used to wrapp both the functions and the arguments
+    # the dask backends used to wrap both the functions and the arguments
     # under instances of the Batch callable class this caching mechanism could
     # lead to bugs as described in: https://github.com/joblib/joblib/pull/1055
     # The joblib-dask backend has been refactored to avoid bundling the
@@ -462,3 +464,28 @@ def test_wait_for_workers_timeout():
     finally:
         client.close()
         cluster.close()
+
+
+@pytest.mark.parametrize("backend", ["loky", "multiprocessing"])
+def test_joblib_warning_inside_dask_daemonic_worker(backend):
+    cluster = LocalCluster(n_workers=2)
+    client = Client(cluster)
+
+    def func_using_joblib_parallel():
+        # Somehow trying to check the warning type here (e.g. with
+        # pytest.warns(UserWarning)) make the test hang. Work-around: return
+        # the warning record to the client and the warning check is done
+        # client-side.
+        with warnings.catch_warnings(record=True) as record:
+            Parallel(n_jobs=2, backend=backend)(
+                delayed(inc)(i) for i in range(10))
+
+        return record
+
+    fut = client.submit(func_using_joblib_parallel)
+    record = fut.result()
+
+    assert len(record) == 1
+    warning = record[0].message
+    assert isinstance(warning, UserWarning)
+    assert "distributed.worker.daemon" in str(warning)
diff --git a/joblib/test/test_deprecated_objects.py b/joblib/test/test_deprecated_objects.py
index d561483ee..9ca6b0882 100644
--- a/joblib/test/test_deprecated_objects.py
+++ b/joblib/test/test_deprecated_objects.py
@@ -2,15 +2,12 @@
 Tests making sure that deprecated objects properly raise a deprecation warning
 when imported/created.
 """
-import sys
-
 import pytest
 
 from joblib.my_exceptions import _deprecated_names as _deprecated_exceptions
 from joblib.format_stack import _deprecated_names as _deprecated_format_utils
 
 
-@pytest.mark.xfail(sys.version_info < (3, 7), reason="no module-level getattr")
 def test_deprecated_joblib_exceptions():
     assert 'JoblibException' in _deprecated_exceptions
     for name in _deprecated_exceptions:
@@ -20,7 +17,6 @@ def test_deprecated_joblib_exceptions():
             exec('from joblib.my_exceptions import {}'.format(name))
 
 
-@pytest.mark.xfail(sys.version_info < (3, 7), reason="no module-level getattr")
 def test_deprecated_formatting_utilities(capsys):
     assert 'safe_repr' in _deprecated_format_utils
     assert 'eq_repr' in _deprecated_format_utils
diff --git a/joblib/test/test_hashing.py b/joblib/test/test_hashing.py
index 37d9480ac..3a3d6316c 100644
--- a/joblib/test/test_hashing.py
+++ b/joblib/test/test_hashing.py
@@ -64,7 +64,7 @@ def f(self, x):
 class KlassWithCachedMethod(object):
 
     def __init__(self, cachedir):
-        mem = Memory(cachedir=cachedir)
+        mem = Memory(location=cachedir)
         self.f = mem.cache(self.f)
 
     def f(self, x):
@@ -260,8 +260,8 @@ def test_numpy_scalar():
 
 
 def test_dict_hash(tmpdir):
-    # Check that dictionaries hash consistently, eventhough the ordering
-    # of the keys is not garanteed
+    # Check that dictionaries hash consistently, even though the ordering
+    # of the keys is not guaranteed
     k = KlassWithCachedMethod(tmpdir.strpath)
 
     d = {'#s12069__c_maps.nii.gz': [33],
diff --git a/joblib/test/test_memmapping.py b/joblib/test/test_memmapping.py
index dc40d23f8..bdc825f06 100644
--- a/joblib/test/test_memmapping.py
+++ b/joblib/test/test_memmapping.py
@@ -9,6 +9,8 @@
 import subprocess
 import threading
 
+import pytest
+
 from joblib.test.common import with_numpy, np
 from joblib.test.common import setup_autokill
 from joblib.test.common import teardown_autokill
@@ -83,7 +85,7 @@ def test_memmap_based_array_reducing(tmpdir):
     buffer[:] = - 1.0 * np.arange(buffer.shape[0], dtype=buffer.dtype)
     buffer.flush()
 
-    # Memmap a 2D fortran array on a offseted subsection of the previous
+    # Memmap a 2D fortran array on a offsetted subsection of the previous
     # buffer
     a = np.memmap(filename, dtype=np.float64, shape=(3, 5, 4),
                   mode='r+', order='F', offset=4)
@@ -146,7 +148,8 @@ def reconstruct_array_or_memmap(x):
     assert_array_equal(b3_reconstructed, b3)
 
 
-@skipif(sys.platform != "win32",
+@with_multiprocessing
+@skipif((sys.platform != "win32") or (),
         reason="PermissionError only easily triggerable on Windows")
 def test_resource_tracker_retries_when_permissionerror(tmpdir):
     # Test resource_tracker retry mechanism when unlinking memmaps.  See more
@@ -355,6 +358,7 @@ def test_pool_with_memmap_array_view(factory, tmpdir):
 
 
 @with_numpy
+@with_multiprocessing
 @parametrize("backend", ["multiprocessing", "loky"])
 def test_permission_error_windows_reference_cycle(backend):
     # Non regression test for:
@@ -389,6 +393,7 @@ def test_permission_error_windows_reference_cycle(backend):
 
 
 @with_numpy
+@with_multiprocessing
 @parametrize("backend", ["multiprocessing", "loky"])
 def test_permission_error_windows_memmap_sent_to_parent(backend):
     # Second non-regression test for:
@@ -581,39 +586,6 @@ def parallel_raise(array, temp_dirs):
         assert b"resource_tracker" not in err, err.decode()
 
 
-@with_numpy
-@with_multiprocessing
-def test_nested_loop_error_in_grandchild_resource_tracker_silent():
-    # Safety smoke test: test that nested parallel calls using the loky backend
-    # don't yield noisy resource_tracker outputs when the grandchild errors
-    # out.
-    cmd = '''if 1:
-        from joblib import Parallel, delayed
-
-
-        def raise_error(i):
-            raise ValueError
-
-
-        def nested_loop(f):
-            Parallel(backend="loky", n_jobs=2)(
-                delayed(f)(i) for i in range(10)
-            )
-
-
-        if __name__ == "__main__":
-            Parallel(backend="loky", n_jobs=2)(
-                delayed(nested_loop)(func) for func in [raise_error]
-            )
-    '''
-    p = subprocess.Popen([sys.executable, '-c', cmd],
-                         stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-    p.wait()
-    out, err = p.communicate()
-    assert p.returncode == 1, out.decode()
-    assert b"resource_tracker" not in err, err.decode()
-
-
 @with_numpy
 @with_multiprocessing
 @parametrize("backend", ["multiprocessing", "loky"])
@@ -641,29 +613,25 @@ def test_many_parallel_calls_on_same_object(backend):
                         delayed(return_slice_of_data)(data, 0, 20)
                         for _ in range(10)
                     )
-                slice_of_data = Parallel(
-                    n_jobs=2, max_nbytes=1, backend='{b}')(
-                        delayed(return_slice_of_data)(data, 0, 20)
-                        for _ in range(10)
-                    )
     '''.format(b=backend)
-
-    for _ in range(3):
-        env = os.environ.copy()
-        env['PYTHONPATH'] = os.path.dirname(__file__)
-        p = subprocess.Popen([sys.executable, '-c', cmd],
-                             stderr=subprocess.PIPE,
-                             stdout=subprocess.PIPE, env=env)
-        p.wait()
-        out, err = p.communicate()
-        assert p.returncode == 0, err
-        assert out == b''
-        if sys.version_info[:3] not in [(3, 8, 0), (3, 8, 1)]:
-            # In early versions of Python 3.8, a reference leak
-            # https://github.com/cloudpipe/cloudpickle/issues/327, holds
-            # references to pickled objects, generating race condition during
-            # cleanup finalizers of joblib and noisy resource_tracker outputs.
-            assert b'resource_tracker' not in err
+    env = os.environ.copy()
+    env['PYTHONPATH'] = os.path.dirname(__file__)
+    p = subprocess.Popen(
+        [sys.executable, '-c', cmd],
+        stderr=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        env=env,
+    )
+    p.wait()
+    out, err = p.communicate()
+    assert p.returncode == 0, err
+    assert out == b''
+    if sys.version_info[:3] not in [(3, 8, 0), (3, 8, 1)]:
+        # In early versions of Python 3.8, a reference leak
+        # https://github.com/cloudpipe/cloudpickle/issues/327, holds
+        # references to pickled objects, generating race condition during
+        # cleanup finalizers of joblib and noisy resource_tracker outputs.
+        assert b'resource_tracker' not in err
 
 
 @with_numpy
@@ -679,7 +647,7 @@ def test_memmap_returned_as_regular_array(backend):
 
 @with_numpy
 @with_multiprocessing
-@parametrize("backend", ["multiprocessing", param("loky", marks=xfail)])
+@parametrize("backend", ["multiprocessing", "loky"])
 def test_resource_tracker_silent_when_reference_cycles(backend):
     # There is a variety of reasons that can make joblib with loky backend
     # output noisy warnings when a reference cycle is preventing a memmap from
@@ -687,10 +655,22 @@ def test_resource_tracker_silent_when_reference_cycles(backend):
     # deletes the temporary folder if it was not done before, which can
     # interact badly with the resource_tracker. We don't risk leaking any
     # resources, but this will likely make joblib output a lot of low-level
-    # confusing messages. This test is marked as xfail for now: but a next PR
-    # should fix this behavior.
+    # confusing messages.
+    #
+    # This test makes sure that the resource_tracker is silent when a reference
+    # has been collected concurrently on non-Windows platforms.
+    #
     # Note that the script in ``cmd`` is the exact same script as in
     # test_permission_error_windows_reference_cycle.
+    if backend == "loky" and sys.platform.startswith('win'):
+        # XXX: on Windows, reference cycles can delay timely garbage collection
+        # and make it impossible to properly delete the temporary folder in the
+        # main process because of permission errors.
+        pytest.xfail(
+            "The temporary folder cannot be deleted on Windows in the "
+            "presence of a reference cycle"
+        )
+
     cmd = """if 1:
         import numpy as np
         from joblib import Parallel, delayed
@@ -714,8 +694,10 @@ def test_resource_tracker_silent_when_reference_cycles(backend):
                          stdout=subprocess.PIPE)
     p.wait()
     out, err = p.communicate()
-    assert p.returncode == 0, out.decode()
-    assert b"resource_tracker" not in err, err.decode()
+    out = out.decode()
+    err = err.decode()
+    assert p.returncode == 0, out + "\n\n" + err
+    assert "resource_tracker" not in err, err
 
 
 @with_numpy
@@ -728,7 +710,7 @@ def test_memmapping_pool_for_large_arrays(factory, tmpdir):
     # Check that the tempfolder is empty
     assert os.listdir(tmpdir.strpath) == []
 
-    # Build an array reducers that automaticaly dump large array content
+    # Build an array reducers that automatically dump large array content
     # to filesystem backed memmap instances to avoid memory explosion
     p = factory(3, max_nbytes=40, temp_folder=tmpdir.strpath, verbose=2)
     try:
@@ -776,7 +758,18 @@ def test_memmapping_pool_for_large_arrays(factory, tmpdir):
 
 @with_numpy
 @with_multiprocessing
-@parametrize("backend", ["multiprocessing", "loky"])
+@parametrize(
+    "backend",
+    [
+        pytest.param(
+            "multiprocessing",
+            marks=pytest.mark.xfail(
+                reason='https://github.com/joblib/joblib/issues/1086'
+            ),
+        ),
+        "loky",
+    ]
+)
 def test_child_raises_parent_exits_cleanly(backend):
     # When a task executed by a child process raises an error, the parent
     # process's backend is notified, and calls abort_everything.
@@ -794,6 +787,8 @@ def test_child_raises_parent_exits_cleanly(backend):
     # - the resource_tracker does not emit any warnings.
     cmd = """if 1:
         import os
+        from pathlib import Path
+        from time import sleep
 
         import numpy as np
         from joblib import Parallel, delayed
@@ -801,12 +796,11 @@ def test_child_raises_parent_exits_cleanly(backend):
 
         data = np.random.rand(1000)
 
-
         def get_temp_folder(parallel_obj, backend):
             if "{b}" == "loky":
-                return p._backend._workers._temp_folder
+                return Path(p._backend._workers._temp_folder)
             else:
-                return p._backend._pool._temp_folder
+                return Path(p._backend._pool._temp_folder)
 
 
         if __name__ == "__main__":
@@ -815,10 +809,27 @@ def get_temp_folder(parallel_obj, backend):
                     temp_folder = get_temp_folder(p, "{b}")
                     p(delayed(print_filename_and_raise)(data)
                               for i in range(1))
-            except ValueError:
+            except ValueError as e:
                 # the temporary folder should be deleted by the end of this
-                # call
-                assert not os.path.exists(temp_folder)
+                # call but apparently on some file systems, this takes
+                # some time to be visible.
+                #
+                # We attempt to write into the temporary folder to test for
+                # its existence and we wait for a maximum of 10 seconds.
+                for i in range(100):
+                    try:
+                        with open(temp_folder / "some_file.txt", "w") as f:
+                            f.write("some content")
+                    except FileNotFoundError:
+                        # temp_folder has been deleted, all is fine
+                        break
+
+                    # ... else, wait a bit and try again
+                    sleep(.1)
+                else:
+                    raise AssertionError(
+                        str(temp_folder) + " was not deleted"
+                    ) from e
     """.format(b=backend)
     env = os.environ.copy()
     env['PYTHONPATH'] = os.path.dirname(__file__)
@@ -828,7 +839,7 @@ def get_temp_folder(parallel_obj, backend):
     out, err = p.communicate()
     out, err = out.decode(), err.decode()
     filename = out.split('\n')[0]
-    assert p.returncode == 0, out
+    assert p.returncode == 0, err or out
     assert err == ''  # no resource_tracker warnings.
     assert not os.path.exists(filename)
 
@@ -951,7 +962,7 @@ def test_memmapping_pool_for_large_arrays_in_return(factory, tmpdir):
     """Check that large arrays are not copied in memory in return"""
     assert_array_equal = np.testing.assert_array_equal
 
-    # Build an array reducers that automaticaly dump large array content
+    # Build an array reducers that automatically dump large array content
     # but check that the returned datastructure are regular arrays to avoid
     # passing a memmap array pointing to a pool controlled temp folder that
     # might be confusing to the user
diff --git a/joblib/test/test_memory.py b/joblib/test/test_memory.py
index ad0ddf4ed..aaa7d1695 100644
--- a/joblib/test/test_memory.py
+++ b/joblib/test/test_memory.py
@@ -6,6 +6,7 @@
 # Copyright (c) 2009 Gael Varoquaux
 # License: BSD Style, 3 clauses.
 
+import functools
 import gc
 import shutil
 import os
@@ -187,7 +188,7 @@ def f(x):
             if call_before_reducing:
                 cached_f(3)
                 # Two files were just created, func_code.py, and a folder
-                # containing the informations (inputs hash/ouptput) of
+                # containing the information (inputs hash/ouptput) of
                 # cached_f(3)
                 assert len(os.listdir(f_cache_directory / 'f')) == 2
 
@@ -364,7 +365,7 @@ def test_memory_eval(tmpdir):
 def count_and_append(x=[]):
     """ A function with a side effect in its arguments.
 
-        Return the lenght of its argument and append one element.
+        Return the length of its argument and append one element.
     """
     len_x = len(x)
     x.append(None)
@@ -488,6 +489,32 @@ def z(x, y=1):
     assert len(accumulator) == 1
 
 
+def test_memory_ignore_decorated(tmpdir):
+    " Test the ignore feature of memory on a decorated function "
+    memory = Memory(location=tmpdir.strpath, verbose=0)
+    accumulator = list()
+
+    def decorate(f):
+        @functools.wraps(f)
+        def wrapped(*args, **kwargs):
+            return f(*args, **kwargs)
+        return wrapped
+
+    @memory.cache(ignore=['y'])
+    @decorate
+    def z(x, y=1):
+        accumulator.append(1)
+
+    assert z.ignore == ['y']
+
+    z(0, y=1)
+    assert len(accumulator) == 1
+    z(0, y=1)
+    assert len(accumulator) == 1
+    z(0, y=2)
+    assert len(accumulator) == 1
+
+
 def test_memory_args_as_kwargs(tmpdir):
     """Non-regression test against 0.12.0 changes.
 
@@ -537,10 +564,6 @@ def test_func_dir(tmpdir):
     assert location == path
     assert os.path.exists(path)
     assert memory.location == os.path.dirname(g.store_backend.location)
-    with warns(DeprecationWarning) as w:
-        assert memory.cachedir == g.store_backend.location
-    assert len(w) == 1
-    assert "The 'cachedir' attribute has been deprecated" in str(w[-1].message)
 
     # Test that the code is stored.
     # For the following test to be robust to previous execution, we clear
@@ -582,6 +605,19 @@ def test_persistence(tmpdir):
     gp(1)
 
 
+def test_check_call_in_cache(tmpdir):
+    for func in (MemorizedFunc(f, tmpdir.strpath),
+                 Memory(location=tmpdir.strpath, verbose=0).cache(f)):
+        result = func.check_call_in_cache(2)
+        assert not result
+        assert isinstance(result, bool)
+        assert func(2) == 5
+        result = func.check_call_in_cache(2)
+        assert result
+        assert isinstance(result, bool)
+        func.clear()
+
+
 def test_call_and_shelve(tmpdir):
     # Test MemorizedFunc outputting a reference to cache.
 
@@ -942,11 +978,14 @@ def test_memory_reduce_size(tmpdir):
 
 
 def test_memory_clear(tmpdir):
-    memory, _, _ = _setup_toy_cache(tmpdir)
+    memory, _, g = _setup_toy_cache(tmpdir)
     memory.clear()
 
     assert os.listdir(memory.store_backend.location) == []
 
+    # Check that the cache for functions hash is also reset.
+    assert not g._check_previous_func_code(stacklevel=4)
+
 
 def fast_func_with_complex_output():
     complex_obj = ['a' * 1000] * 1000
@@ -1049,31 +1088,8 @@ def func(arg):
         assert message in str(e.args)
 
 
-def test_deprecated_cachedir_behaviour(tmpdir):
-    # verify the right deprecation warnings are raised when using cachedir
-    # option instead of new location parameter.
-    with warns(None) as w:
-        memory = Memory(cachedir=tmpdir.strpath, verbose=0)
-        assert memory.store_backend.location.startswith(tmpdir.strpath)
-
-    assert len(w) == 1
-    assert "The 'cachedir' parameter has been deprecated" in str(w[-1].message)
-
-    with warns(None) as w:
-        memory = Memory()
-        assert memory.cachedir is None
-
-    assert len(w) == 1
-    assert "The 'cachedir' attribute has been deprecated" in str(w[-1].message)
-
-    error_regex = """You set both "location='.+ and "cachedir='.+"""
-    with raises(ValueError, match=error_regex):
-        memory = Memory(location=tmpdir.strpath, cachedir=tmpdir.strpath,
-                        verbose=0)
-
-
 class IncompleteStoreBackend(StoreBackendBase):
-    """This backend cannot be instanciated and should raise a TypeError."""
+    """This backend cannot be instantiated and should raise a TypeError."""
     pass
 
 
@@ -1130,7 +1146,7 @@ def test_register_invalid_store_backends_object():
 
 
 def test_memory_default_store_backend():
-    # test an unknow backend falls back into a FileSystemStoreBackend
+    # test an unknown backend falls back into a FileSystemStoreBackend
     with raises(TypeError) as excinfo:
         Memory(location='/tmp/joblib', backend='unknown')
     excinfo.match(r"Unknown location*")
@@ -1144,7 +1160,7 @@ class NonSupportedLocationClass:
     with warns(UserWarning) as warninfo:
         _store_backend_factory("local", location=unsupported_location)
 
-    expected_mesage = ("Instanciating a backend using a "
+    expected_mesage = ("Instantiating a backend using a "
                        "NonSupportedLocationClass as a location is not "
                        "supported by joblib")
     assert expected_mesage in str(warninfo[0].message)
@@ -1152,7 +1168,7 @@ class NonSupportedLocationClass:
 
 def test_instanciate_incomplete_store_backend():
     # Verify that registering an external incomplete store backend raises an
-    # exception when one tries to instanciate it.
+    # exception when one tries to instantiate it.
     backend_name = "isb"
     register_store_backend(backend_name, IncompleteStoreBackend)
     assert (backend_name, IncompleteStoreBackend) in _STORE_BACKENDS.items()
@@ -1174,7 +1190,7 @@ def test_dummy_store_backend():
 
 
 def test_instanciate_store_backend_with_pathlib_path():
-    # Instanciate a FileSystemStoreBackend using a pathlib.Path object
+    # Instantiate a FileSystemStoreBackend using a pathlib.Path object
     path = pathlib.Path("some_folder")
     backend_obj = _store_backend_factory("local", path)
     assert backend_obj.location == "some_folder"
@@ -1191,7 +1207,7 @@ def test_filesystem_store_backend_repr(tmpdir):
 
     assert str(backend) == repr_pattern.format(location=None)
 
-    # backend location is passed explicitely via the configure method (called
+    # backend location is passed explicitly via the configure method (called
     # by the internal _store_backend_factory function)
     backend.configure(tmpdir.strpath)
 
diff --git a/joblib/test/test_missing_multiprocessing.py b/joblib/test/test_missing_multiprocessing.py
new file mode 100644
index 000000000..251925ced
--- /dev/null
+++ b/joblib/test/test_missing_multiprocessing.py
@@ -0,0 +1,32 @@
+"""
+Pyodide and other single-threaded Python builds will be missing the
+_multiprocessing module. Test that joblib still works in this environment.
+"""
+
+import os
+import subprocess
+import sys
+
+
+def test_missing_multiprocessing(tmp_path):
+    """
+    Test that import joblib works even if _multiprocessing is missing.
+
+    pytest has already imported everything from joblib. The most reasonable way
+    to test importing joblib with modified environment is to invoke a separate
+    Python process. This also ensures that we don't break other tests by
+    importing a bad `_multiprocessing` module.
+    """
+    (tmp_path / "_multiprocessing.py").write_text(
+        'raise ImportError("No _multiprocessing module!")'
+    )
+    env = dict(os.environ)
+    # For subprocess, use current sys.path with our custom version of
+    # multiprocessing inserted.
+    env["PYTHONPATH"] = ":".join([str(tmp_path)] + sys.path)
+    subprocess.check_call(
+        [sys.executable, "-c",
+         "import joblib, math; "
+         "joblib.Parallel(n_jobs=1)("
+         "joblib.delayed(math.sqrt)(i**2) for i in range(10))"
+         ], env=env)
diff --git a/joblib/test/test_module.py b/joblib/test/test_module.py
index 9c3b12b90..a2257a414 100644
--- a/joblib/test/test_module.py
+++ b/joblib/test/test_module.py
@@ -1,7 +1,7 @@
 import sys
 import joblib
-import pytest
 from joblib.testing import check_subprocess_call
+from joblib.test.common import with_multiprocessing
 
 
 def test_version():
@@ -9,6 +9,7 @@ def test_version():
         "There are no __version__ argument on the joblib module")
 
 
+@with_multiprocessing
 def test_no_start_method_side_effect_on_import():
     # check that importing joblib does not implicitly set the global
     # start_method for multiprocessing.
@@ -22,6 +23,7 @@ def test_no_start_method_side_effect_on_import():
     check_subprocess_call([sys.executable, '-c', code])
 
 
+@with_multiprocessing
 def test_no_semaphore_tracker_on_import():
     # check that importing joblib does not implicitly spawn a resource tracker
     # or a semaphore tracker
@@ -38,6 +40,7 @@ def test_no_semaphore_tracker_on_import():
     check_subprocess_call([sys.executable, '-c', code])
 
 
+@with_multiprocessing
 def test_no_resource_tracker_on_import():
     code = """if True:
         import joblib
diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py
index db130b1f4..c9d1d5bdb 100644
--- a/joblib/test/test_numpy_pickle.py
+++ b/joblib/test/test_numpy_pickle.py
@@ -5,6 +5,7 @@
 import random
 import re
 import io
+import sys
 import warnings
 import gzip
 import zlib
@@ -13,15 +14,18 @@
 import socket
 from contextlib import closing
 import mmap
+from pathlib import Path
+
 try:
     import lzma
 except ImportError:
     lzma = None
+
 import pytest
 
 from joblib.test.common import np, with_numpy, with_lz4, without_lz4
 from joblib.test.common import with_memory_profiler, memory_used
-from joblib.testing import parametrize, raises, SkipTest, warns
+from joblib.testing import parametrize, raises, warns
 
 # numpy_pickle is not a drop-in replacement of pickle, as it takes
 # filenames instead of open files as arguments.
@@ -30,6 +34,8 @@
 
 from joblib.numpy_pickle_utils import _IO_BUFFER_SIZE
 from joblib.numpy_pickle_utils import _detect_compressor
+from joblib.numpy_pickle_utils import _is_numpy_array_byte_order_mismatch
+from joblib.numpy_pickle_utils import _ensure_native_byte_order
 from joblib.compressor import (_COMPRESSORS, _LZ4_PREFIX, CompressorWrapper,
                                LZ4_NOT_INSTALLED_ERROR, BinaryZlibFile)
 
@@ -146,21 +152,19 @@ def test_numpy_persistence(tmpdir, compress):
         # And finally, check that all the values are equal.
         np.testing.assert_array_equal(np.array(obj), np.array(obj_))
 
-    # Now test with array subclasses
-    for obj in (np.matrix(np.zeros(10)),
-                np.memmap(filename + 'mmap',
-                          mode='w+', shape=4, dtype=np.float)):
-        filenames = numpy_pickle.dump(obj, filename, compress=compress)
-        # All is cached in one file
-        assert len(filenames) == 1
+    # Now test with an array subclass
+    obj = np.memmap(filename + 'mmap', mode='w+', shape=4, dtype=np.float64)
+    filenames = numpy_pickle.dump(obj, filename, compress=compress)
+    # All is cached in one file
+    assert len(filenames) == 1
 
-        obj_ = numpy_pickle.load(filename)
-        if (type(obj) is not np.memmap and
-                hasattr(obj, '__array_prepare__')):
-            # We don't reconstruct memmaps
-            assert isinstance(obj_, type(obj))
+    obj_ = numpy_pickle.load(filename)
+    if (type(obj) is not np.memmap and
+            hasattr(obj, '__array_prepare__')):
+        # We don't reconstruct memmaps
+        assert isinstance(obj_, type(obj))
 
-        np.testing.assert_array_equal(obj_, obj)
+    np.testing.assert_array_equal(obj_, obj)
 
     # Test with an object containing multiple numpy arrays
     obj = ComplexTestObject()
@@ -276,11 +280,13 @@ def test_compress_mmap_mode_warning(tmpdir):
     numpy_pickle.dump(a, this_filename, compress=1)
     with warns(UserWarning) as warninfo:
         numpy_pickle.load(this_filename, mmap_mode='r+')
+    warninfo = [w.message for w in warninfo]
     assert len(warninfo) == 1
-    assert (str(warninfo[0].message) ==
-            'mmap_mode "%(mmap_mode)s" is not compatible with compressed '
-            'file %(filename)s. "%(mmap_mode)s" flag will be ignored.' %
-            {'filename': this_filename, 'mmap_mode': 'r+'})
+    assert (
+        str(warninfo[0]) ==
+        'mmap_mode "r+" is not compatible with compressed '
+        f'file {this_filename}. "r+" flag will be ignored.'
+    )
 
 
 @with_numpy
@@ -292,7 +298,7 @@ def test_cache_size_warning(tmpdir, cache_size):
     a = rnd.random_sample((10, 2))
 
     warnings.simplefilter("always")
-    with warns(None) as warninfo:
+    with warnings.catch_warnings(record=True) as warninfo:
         numpy_pickle.dump(a, filename, cache_size=cache_size)
     expected_nb_warnings = 1 if cache_size is not None else 0
     assert len(warninfo) == expected_nb_warnings
@@ -312,10 +318,8 @@ def test_memory_usage(tmpdir, compress):
     filename = tmpdir.join('test.pkl').strpath
     small_array = np.ones((10, 10))
     big_array = np.ones(shape=100 * int(1e6), dtype=np.uint8)
-    small_matrix = np.matrix(small_array)
-    big_matrix = np.matrix(big_array)
 
-    for obj in (small_array, big_array, small_matrix, big_matrix):
+    for obj in (small_array, big_array):
         size = obj.nbytes / 1e6
         obj_filename = filename + str(np.random.randint(0, 1000))
         mem_used = memory_used(numpy_pickle.dump,
@@ -341,11 +345,6 @@ def test_compressed_pickle_dump_and_load(tmpdir):
                      np.arange(5, dtype=np.dtype('>f8')),
                      np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
                      np.arange(256, dtype=np.uint8).tobytes(),
-                     # np.matrix is a subclass of np.ndarray, here we want
-                     # to verify this type of object is correctly unpickled
-                     # among versions.
-                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
-                     np.matrix([0, 1, 2], dtype=np.dtype('>i8')),
                      u"C'est l'\xe9t\xe9 !"]
 
     fname = tmpdir.join('temp.pkl.gz').strpath
@@ -355,13 +354,14 @@ def test_compressed_pickle_dump_and_load(tmpdir):
     result_list = numpy_pickle.load(fname)
     for result, expected in zip(result_list, expected_list):
         if isinstance(expected, np.ndarray):
+            expected = _ensure_native_byte_order(expected)
             assert result.dtype == expected.dtype
             np.testing.assert_equal(result, expected)
         else:
             assert result == expected
 
 
-def _check_pickle(filename, expected_list):
+def _check_pickle(filename, expected_list, mmap_mode=None):
     """Helper function to test joblib pickle content.
 
     Note: currently only pickles containing an iterable are supported
@@ -376,24 +376,44 @@ def _check_pickle(filename, expected_list):
         py_version_used_for_writing, 4)
     if pickle_reading_protocol >= pickle_writing_protocol:
         try:
-            with warns(None) as warninfo:
+            with warnings.catch_warnings(record=True) as warninfo:
                 warnings.simplefilter('always')
                 warnings.filterwarnings(
                     'ignore', module='numpy',
                     message='The compiler package is deprecated')
-                result_list = numpy_pickle.load(filename)
+                result_list = numpy_pickle.load(filename, mmap_mode=mmap_mode)
             filename_base = os.path.basename(filename)
-            expected_nb_warnings = 1 if ("_0.9" in filename_base or
-                                         "_0.8.4" in filename_base) else 0
+            expected_nb_deprecation_warnings = 1 if (
+                "_0.9" in filename_base or "_0.8.4" in filename_base) else 0
+
+            expected_nb_user_warnings = 3 if (
+                re.search("_0.1.+.pkl$", filename_base) and
+                mmap_mode is not None) else 0
+            expected_nb_warnings = \
+                expected_nb_deprecation_warnings + expected_nb_user_warnings
             assert len(warninfo) == expected_nb_warnings
-            for w in warninfo:
-                assert w.category == DeprecationWarning
+
+            deprecation_warnings = [
+                w for w in warninfo if issubclass(
+                    w.category, DeprecationWarning)]
+            user_warnings = [
+                w for w in warninfo if issubclass(
+                    w.category, UserWarning)]
+            for w in deprecation_warnings:
                 assert (str(w.message) ==
                         "The file '{0}' has been generated with a joblib "
                         "version less than 0.10. Please regenerate this "
                         "pickle file.".format(filename))
+
+            for w in user_warnings:
+                escaped_filename = re.escape(filename)
+                assert re.search(
+                    f"memmapped.+{escaped_filename}.+segmentation fault",
+                    str(w.message))
+
             for result, expected in zip(result_list, expected_list):
                 if isinstance(expected, np.ndarray):
+                    expected = _ensure_native_byte_order(expected)
                     assert result.dtype == expected.dtype
                     np.testing.assert_equal(result, expected)
                 else:
@@ -457,6 +477,68 @@ def test_joblib_pickle_across_python_versions():
         _check_pickle(fname, expected_list)
 
 
+@with_numpy
+def test_joblib_pickle_across_python_versions_with_mmap():
+    expected_list = [np.arange(5, dtype=np.dtype('<i8')),
+                     np.arange(5, dtype=np.dtype('<f8')),
+                     np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
+                     np.arange(256, dtype=np.uint8).tobytes(),
+                     # np.matrix is a subclass of np.ndarray, here we want
+                     # to verify this type of object is correctly unpickled
+                     # among versions.
+                     np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
+                     u"C'est l'\xe9t\xe9 !"]
+
+    test_data_dir = os.path.dirname(os.path.abspath(data.__file__))
+
+    pickle_filenames = [
+        os.path.join(test_data_dir, fn)
+        for fn in os.listdir(test_data_dir) if fn.endswith('.pkl')]
+    for fname in pickle_filenames:
+        _check_pickle(fname, expected_list, mmap_mode='r')
+
+
+@with_numpy
+def test_numpy_array_byte_order_mismatch_detection():
+    # List of numpy arrays with big endian byteorder.
+    be_arrays = [np.array([(1, 2.0), (3, 4.0)],
+                          dtype=[('', '>i8'), ('', '>f8')]),
+                 np.arange(3, dtype=np.dtype('>i8')),
+                 np.arange(3, dtype=np.dtype('>f8'))]
+
+    # Verify the byteorder mismatch is correctly detected.
+    for array in be_arrays:
+        if sys.byteorder == 'big':
+            assert not _is_numpy_array_byte_order_mismatch(array)
+        else:
+            assert _is_numpy_array_byte_order_mismatch(array)
+        converted = _ensure_native_byte_order(array)
+        if converted.dtype.fields:
+            for f in converted.dtype.fields.values():
+                f[0].byteorder == '='
+        else:
+            assert converted.dtype.byteorder == "="
+
+    # List of numpy arrays with little endian byteorder.
+    le_arrays = [np.array([(1, 2.0), (3, 4.0)],
+                          dtype=[('', '<i8'), ('', '<f8')]),
+                 np.arange(3, dtype=np.dtype('<i8')),
+                 np.arange(3, dtype=np.dtype('<f8'))]
+
+    # Verify the byteorder mismatch is correctly detected.
+    for array in le_arrays:
+        if sys.byteorder == 'little':
+            assert not _is_numpy_array_byte_order_mismatch(array)
+        else:
+            assert _is_numpy_array_byte_order_mismatch(array)
+        converted = _ensure_native_byte_order(array)
+        if converted.dtype.fields:
+            for f in converted.dtype.fields.values():
+                f[0].byteorder == '='
+        else:
+            assert converted.dtype.byteorder == "="
+
+
 @parametrize('compress_tuple', [('zlib', 3), ('gzip', 3)])
 def test_compress_tuple_argument(tmpdir, compress_tuple):
     # Verify the tuple is correctly taken into account.
@@ -595,9 +677,7 @@ def test_compression_using_file_extension(tmpdir, extension, cmethod):
 
 @with_numpy
 def test_file_handle_persistence(tmpdir):
-    objs = [np.random.random((10, 10)),
-            "some data",
-            np.matrix([0, 1, 2])]
+    objs = [np.random.random((10, 10)), "some data"]
     fobjs = [bz2.BZ2File, gzip.GzipFile]
     if lzma is not None:
         fobjs += [lzma.LZMAFile]
@@ -628,9 +708,7 @@ def test_file_handle_persistence(tmpdir):
 
 @with_numpy
 def test_in_memory_persistence():
-    objs = [np.random.random((10, 10)),
-            "some data",
-            np.matrix([0, 1, 2])]
+    objs = [np.random.random((10, 10)), "some data"]
     for obj in objs:
         f = io.BytesIO()
         numpy_pickle.dump(obj, f)
@@ -805,17 +883,12 @@ def test_numpy_subclass(tmpdir):
 
 
 def test_pathlib(tmpdir):
-    try:
-        from pathlib import Path
-    except ImportError:
-        pass
-    else:
-        filename = tmpdir.join('test.pkl').strpath
-        value = 123
-        numpy_pickle.dump(value, Path(filename))
-        assert numpy_pickle.load(filename) == value
-        numpy_pickle.dump(value, filename)
-        assert numpy_pickle.load(Path(filename)) == value
+    filename = tmpdir.join('test.pkl').strpath
+    value = 123
+    numpy_pickle.dump(value, Path(filename))
+    assert numpy_pickle.load(filename) == value
+    numpy_pickle.dump(value, filename)
+    assert numpy_pickle.load(Path(filename)) == value
 
 
 @with_numpy
@@ -870,6 +943,17 @@ def test_pickle_in_socket():
 
     np.testing.assert_array_equal(array_reloaded, test_array)
 
+    # Check that a byte-aligned numpy array written in a file can be send over
+    # a socket and then read on the other side
+    bytes_to_send = io.BytesIO()
+    numpy_pickle.dump(test_array, bytes_to_send)
+    server.send(bytes_to_send.getvalue())
+
+    with client.makefile("rb") as cf:
+        array_reloaded = numpy_pickle.load(cf)
+
+    np.testing.assert_array_equal(array_reloaded, test_array)
+
 
 @with_numpy
 def test_load_memmap_with_big_offset(tmpdir):
@@ -1010,3 +1094,65 @@ def test_lz4_compression_without_lz4(tmpdir):
     with raises(ValueError) as excinfo:
         numpy_pickle.dump(data, fname + '.lz4')
     excinfo.match(msg)
+
+
+protocols = [pickle.DEFAULT_PROTOCOL]
+if pickle.HIGHEST_PROTOCOL != pickle.DEFAULT_PROTOCOL:
+    protocols.append(pickle.HIGHEST_PROTOCOL)
+
+
+@with_numpy
+@parametrize('protocol', protocols)
+def test_memmap_alignment_padding(tmpdir, protocol):
+    # Test that memmaped arrays returned by numpy.load are correctly aligned
+    fname = tmpdir.join('test.mmap').strpath
+
+    a = np.random.randn(2)
+    numpy_pickle.dump(a, fname, protocol=protocol)
+    memmap = numpy_pickle.load(fname, mmap_mode='r')
+    assert isinstance(memmap, np.memmap)
+    np.testing.assert_array_equal(a, memmap)
+    assert (
+        memmap.ctypes.data % numpy_pickle.NUMPY_ARRAY_ALIGNMENT_BYTES == 0)
+    assert memmap.flags.aligned
+
+    array_list = [
+        np.random.randn(2), np.random.randn(2),
+        np.random.randn(2), np.random.randn(2)
+    ]
+
+    # On Windows OSError 22 if reusing the same path for memmap ...
+    fname = tmpdir.join('test1.mmap').strpath
+    numpy_pickle.dump(array_list, fname, protocol=protocol)
+    l_reloaded = numpy_pickle.load(fname, mmap_mode='r')
+
+    for idx, memmap in enumerate(l_reloaded):
+        assert isinstance(memmap, np.memmap)
+        np.testing.assert_array_equal(array_list[idx], memmap)
+        assert (
+            memmap.ctypes.data % numpy_pickle.NUMPY_ARRAY_ALIGNMENT_BYTES == 0)
+        assert memmap.flags.aligned
+
+    array_dict = {
+        'a0': np.arange(2, dtype=np.uint8),
+        'a1': np.arange(3, dtype=np.uint8),
+        'a2': np.arange(5, dtype=np.uint8),
+        'a3': np.arange(7, dtype=np.uint8),
+        'a4': np.arange(11, dtype=np.uint8),
+        'a5': np.arange(13, dtype=np.uint8),
+        'a6': np.arange(17, dtype=np.uint8),
+        'a7': np.arange(19, dtype=np.uint8),
+        'a8': np.arange(23, dtype=np.uint8),
+    }
+
+    # On Windows OSError 22 if reusing the same path for memmap ...
+    fname = tmpdir.join('test2.mmap').strpath
+    numpy_pickle.dump(array_dict, fname, protocol=protocol)
+    d_reloaded = numpy_pickle.load(fname, mmap_mode='r')
+
+    for key, memmap in d_reloaded.items():
+        assert isinstance(memmap, np.memmap)
+        np.testing.assert_array_equal(array_dict[key], memmap)
+        assert (
+            memmap.ctypes.data % numpy_pickle.NUMPY_ARRAY_ALIGNMENT_BYTES == 0)
+        assert memmap.flags.aligned
diff --git a/joblib/test/test_parallel.py b/joblib/test/test_parallel.py
index 2458ed906..906d43629 100644
--- a/joblib/test/test_parallel.py
+++ b/joblib/test/test_parallel.py
@@ -10,6 +10,7 @@
 import sys
 import time
 import mmap
+import warnings
 import threading
 from traceback import format_exception
 from math import sqrt
@@ -17,21 +18,23 @@
 from pickle import PicklingError
 from multiprocessing import TimeoutError
 import pickle
+import warnings
 import pytest
 
-from importlib import reload
-
 import joblib
 from joblib import parallel
 from joblib import dump, load
-from joblib.externals.loky import get_reusable_executor
+
+from joblib._multiprocessing_helpers import mp
 
 from joblib.test.common import np, with_numpy
 from joblib.test.common import with_multiprocessing
 from joblib.testing import (parametrize, raises, check_subprocess_call,
-                            skipif, SkipTest, warns)
+                            skipif, warns)
 
-from joblib.externals.loky.process_executor import TerminatedWorkerError
+if mp is not None:
+    # Loky is not available if multiprocessing is not
+    from joblib.externals.loky import get_reusable_executor
 
 from queue import Queue
 
@@ -69,7 +72,10 @@
 ALL_VALID_BACKENDS = [None] + sorted(BACKENDS.keys())
 # Add instances of backend classes deriving from ParallelBackendBase
 ALL_VALID_BACKENDS += [BACKENDS[backend_str]() for backend_str in BACKENDS]
-PROCESS_BACKENDS = ['multiprocessing', 'loky']
+if mp is None:
+    PROCESS_BACKENDS = []
+else:
+    PROCESS_BACKENDS = ['multiprocessing', 'loky']
 PARALLEL_BACKENDS = PROCESS_BACKENDS + ['threading']
 
 if hasattr(mp, 'get_context'):
@@ -174,7 +180,7 @@ def test_main_thread_renamed_no_warning(backend, monkeypatch):
     monkeypatch.setattr(target=threading.current_thread(), name='name',
                         value='some_new_name_for_the_main_thread')
 
-    with warns(None) as warninfo:
+    with warnings.catch_warnings(record=True) as warninfo:
         results = Parallel(n_jobs=2, backend=backend)(
             delayed(square)(x) for x in range(3))
         assert results == [0, 1, 4]
@@ -190,22 +196,28 @@ def test_main_thread_renamed_no_warning(backend, monkeypatch):
 
 
 def _assert_warning_nested(backend, inner_n_jobs, expected):
-    with warns(None) as records:
+    with warnings.catch_warnings(record=True) as warninfo:
+        warnings.simplefilter("always")
         parallel_func(backend=backend, inner_n_jobs=inner_n_jobs)
 
+    warninfo = [w.message for w in warninfo]
     if expected:
-        # with threading, we might see more that one records
-        if len(records) > 0:
-            return 'backed parallel loops cannot' in records[0].message.args[0]
+        # with threading, we might see more that one warninfo
+        if warninfo:
+            return (
+                len(warninfo) == 1 and
+                'backed parallel loops cannot' in warninfo[0].args[0]
+            )
         return False
     else:
-        assert len(records) == 0
+        assert not warninfo
         return True
 
 
 @with_multiprocessing
 @parametrize('parent_backend,child_backend,expected', [
-    ('loky', 'multiprocessing', True), ('loky', 'loky', False),
+    ('loky', 'multiprocessing', True),
+    ('loky', 'loky', False),
     ('multiprocessing', 'multiprocessing', True),
     ('multiprocessing', 'loky', True),
     ('threading', 'multiprocessing', True),
@@ -241,11 +253,11 @@ def test_background_thread_parallelism(backend):
     is_run_parallel = [False]
 
     def background_thread(is_run_parallel):
-        with warns(None) as records:
+        with warnings.catch_warnings(record=True) as warninfo:
             Parallel(n_jobs=2)(
                 delayed(sleep)(.1) for _ in range(4))
-        print(len(records))
-        is_run_parallel[0] = len(records) == 0
+        print(len(warninfo))
+        is_run_parallel[0] = len(warninfo) == 0
 
     t = threading.Thread(target=background_thread, args=(is_run_parallel,))
     t.start()
@@ -269,6 +281,7 @@ def raise_exception(backend):
     raise ValueError
 
 
+@with_multiprocessing
 def test_nested_loop_with_exception_with_loky():
     with raises(ValueError):
         with Parallel(n_jobs=2, backend="loky") as parallel:
@@ -568,8 +581,14 @@ def effective_n_jobs(self, n_jobs=1):
 
 
 def test_invalid_backend():
-    with raises(ValueError):
+    with raises(ValueError) as excinfo:
         Parallel(backend='unit-testing')
+    assert "Invalid backend:" in str(excinfo.value)
+
+    with raises(ValueError) as excinfo:
+        with parallel_backend('unit-testing'):
+            pass
+    assert "Invalid backend:" in str(excinfo.value)
 
 
 @parametrize('backend', ALL_VALID_BACKENDS)
@@ -600,6 +619,17 @@ def test_overwrite_default_backend():
     assert _active_backend_type() == DefaultBackend
 
 
+@skipif(mp is not None, reason="Only without multiprocessing")
+def test_backend_no_multiprocessing():
+    with warns(UserWarning,
+               match="joblib backend '.*' is not available on.*"):
+        Parallel(backend='loky')(delayed(square)(i) for i in range(3))
+
+    # The below should now work without problems
+    with parallel_backend('loky'):
+        Parallel()(delayed(square)(i) for i in range(3))
+
+
 def check_backend_context_manager(backend_name):
     with parallel_backend(backend_name, n_jobs=3):
         active_backend, active_n_jobs = parallel.get_active_backend()
@@ -1001,6 +1031,7 @@ def test_parallel_with_unpicklable_functions_in_args(
 
 INTERACTIVE_DEFINED_FUNCTION_AND_CLASS_SCRIPT_CONTENT = """\
 import sys
+import faulthandler
 # Make sure that joblib is importable in the subprocess launching this
 # script. This is needed in case we run the tests from the joblib root
 # folder without having installed joblib
@@ -1025,6 +1056,9 @@ def square(x, ignored=None, ignored2=None):
 # Here, we do not need the `if __name__ == "__main__":` safeguard when
 # using the default `loky` backend (even on Windows).
 
+# To make debugging easier
+faulthandler.dump_traceback_later(30, exit=True)
+
 # The following baroque function call is meant to check that joblib
 # introspection rightfully uses cloudpickle instead of the (faster) pickle
 # module of the standard library when necessary. In particular cloudpickle is
@@ -1047,9 +1081,11 @@ def test_parallel_with_interactively_defined_functions_default_backend(tmpdir):
     # filesystem script.
     script = tmpdir.join('joblib_interactively_defined_function.py')
     script.write(INTERACTIVE_DEFINED_FUNCTION_AND_CLASS_SCRIPT_CONTENT)
-    check_subprocess_call([sys.executable, script.strpath],
-                          stdout_regex=r'\[0, 1, 4, 9, 16\]',
-                          timeout=5)
+    check_subprocess_call(
+        [sys.executable, script.strpath],
+        stdout_regex=r'\[0, 1, 4, 9, 16\]',
+        timeout=None,  # rely on faulthandler to kill the process
+    )
 
 
 INTERACTIVELY_DEFINED_SUBCLASS_WITH_METHOD_SCRIPT_CONTENT = """\
@@ -1146,7 +1182,7 @@ def test_memmap_with_big_offset(tmpdir):
 
 
 def test_warning_about_timeout_not_supported_by_backend():
-    with warns(None) as warninfo:
+    with warnings.catch_warnings(record=True) as warninfo:
         Parallel(timeout=1)(delayed(square)(i) for i in range(50))
     assert len(warninfo) == 1
     w = warninfo[0]
@@ -1207,7 +1243,10 @@ def test_memmapping_leaks(backend, tmpdir):
         raise AssertionError('temporary directory of Parallel was not removed')
 
 
-@parametrize('backend', [None, 'loky', 'threading'])
+@parametrize('backend',
+             ([None, 'threading'] if mp is None
+              else [None, 'loky', 'threading'])
+             )
 def test_lambda_expression(backend):
     # cloudpickle is used to pickle delayed callables
     results = Parallel(n_jobs=2, backend=backend)(
@@ -1237,6 +1276,7 @@ def test_backend_batch_statistics_reset(backend):
             p._backend._DEFAULT_SMOOTHED_BATCH_DURATION)
 
 
+@with_multiprocessing
 def test_backend_hinting_and_constraints():
     for n_jobs in [1, 2, -1]:
         assert type(Parallel(n_jobs=n_jobs)._backend) == LokyBackend
@@ -1347,12 +1387,13 @@ def test_invalid_backend_hinting_and_constraints():
         # requiring shared memory semantics.
         Parallel(prefer='processes', require='sharedmem')
 
-    # It is inconsistent to ask explictly for a process-based parallelism
-    # while requiring shared memory semantics.
-    with raises(ValueError):
-        Parallel(backend='loky', require='sharedmem')
-    with raises(ValueError):
-        Parallel(backend='multiprocessing', require='sharedmem')
+    if mp is not None:
+        # It is inconsistent to ask explicitly for a process-based
+        # parallelism while requiring shared memory semantics.
+        with raises(ValueError):
+            Parallel(backend='loky', require='sharedmem')
+        with raises(ValueError):
+            Parallel(backend='multiprocessing', require='sharedmem')
 
 
 def test_global_parallel_backend():
@@ -1437,7 +1478,8 @@ def _recursive_parallel(nesting_limit=None):
     return Parallel()(delayed(_recursive_parallel)() for i in range(2))
 
 
-@parametrize('backend', ['loky', 'threading'])
+@parametrize('backend',
+             (['threading'] if mp is None else ['loky', 'threading']))
 def test_thread_bomb_mitigation(backend):
     # Test that recursive parallelism raises a recursion rather than
     # saturating the operating system resources by creating a unbounded number
@@ -1446,13 +1488,18 @@ def test_thread_bomb_mitigation(backend):
         with raises(BaseException) as excinfo:
             _recursive_parallel()
     exc = excinfo.value
-    if backend == "loky" and isinstance(exc, TerminatedWorkerError):
-        # The recursion exception can itself cause an error when pickling it to
-        # be send back to the parent process. In this case the worker crashes
-        # but the original traceback is still printed on stderr. This could be
-        # improved but does not seem simple to do and this is is not critical
-        # for users (as long as there is no process or thread bomb happening).
-        pytest.xfail("Loky worker crash when serializing RecursionError")
+    if backend == "loky":
+        # Local import because loky may not be importable for lack of
+        # multiprocessing
+        from joblib.externals.loky.process_executor import TerminatedWorkerError # noqa
+        if isinstance(exc, TerminatedWorkerError):
+            # The recursion exception can itself cause an error when
+            # pickling it to be send back to the parent process. In this
+            # case the worker crashes but the original traceback is still
+            # printed on stderr. This could be improved but does not seem
+            # simple to do and this is is not critical for users (as long
+            # as there is no process or thread bomb happening).
+            pytest.xfail("Loky worker crash when serializing RecursionError")
     else:
         assert isinstance(exc, RecursionError)
 
@@ -1466,7 +1513,7 @@ def _run_parallel_sum():
     return env_vars, parallel_sum(100)
 
 
-@parametrize("backend", [None, 'loky'])
+@parametrize("backend", ([None, 'loky'] if mp is not None else [None]))
 @skipif(parallel_sum is None, reason="Need OpenMP helper compiled")
 def test_parallel_thread_limit(backend):
     results = Parallel(n_jobs=2, backend=backend)(
@@ -1563,7 +1610,7 @@ def _parent_max_num_threads_for(child_module, parent_info):
 
 def check_child_num_threads(workers_info, parent_info, num_threads):
     # Check that the number of threads reported in workers_info is consistent
-    # with the expectation. We need to be carefull to handle the cases where
+    # with the expectation. We need to be careful to handle the cases where
     # the requested number of threads is below max_num_thread for the library.
     for child_threadpool_info in workers_info:
         for child_module in child_threadpool_info:
diff --git a/joblib/test/test_utils.py b/joblib/test/test_utils.py
new file mode 100644
index 000000000..4999a212c
--- /dev/null
+++ b/joblib/test/test_utils.py
@@ -0,0 +1,27 @@
+import pytest
+
+from joblib._utils import eval_expr
+
+
+@pytest.mark.parametrize(
+    "expr",
+    ["exec('import os')", "print(1)", "import os", "1+1; import os", "1^1"],
+)
+def test_eval_expr_invalid(expr):
+    with pytest.raises(
+        ValueError, match="is not a valid or supported arithmetic"
+    ):
+        eval_expr(expr)
+
+
+@pytest.mark.parametrize(
+    "expr, result",
+    [
+        ("2*6", 12),
+        ("2**6", 64),
+        ("1 + 2*3**(4) / (6 + -7)", -161.0),
+        ("(20 // 3) % 5", 1),
+    ],
+)
+def test_eval_expr_valid(expr, result):
+    assert eval_expr(expr) == result
diff --git a/joblib/testing.py b/joblib/testing.py
index 28f79311c..f8939f056 100644
--- a/joblib/testing.py
+++ b/joblib/testing.py
@@ -50,9 +50,10 @@ def kill_process():
         warnings.warn("Timeout running {}".format(cmd))
         proc.kill()
 
-    timer = threading.Timer(timeout, kill_process)
     try:
-        timer.start()
+        if timeout is not None:
+            timer = threading.Timer(timeout, kill_process)
+            timer.start()
         stdout, stderr = proc.communicate()
         stdout, stderr = stdout.decode(), stderr.decode()
         if proc.returncode != 0:
@@ -74,4 +75,5 @@ def kill_process():
                     stderr_regex, stderr))
 
     finally:
-        timer.cancel()
+        if timeout is not None:
+            timer.cancel()
diff --git a/setup.cfg b/setup.cfg
index 6a31db944..e3dfb343e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,14 +4,13 @@ release = egg_info -RDb ''
 upload = upload upload_docs --upload-dir doc/_build/html
 
 [bdist_rpm]
-doc-files = doc
+doc_files = doc
 
 [tool:pytest]
 doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS
 addopts =
     --doctest-glob="doc/*.rst"
     --doctest-modules
-    -p no:warnings
     --ignore joblib/externals
 testpaths = joblib
 
diff --git a/setup.py b/setup.py
index d2794c2d5..d9c642f34 100755
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,9 @@
     author='Gael Varoquaux',
     author_email='gael.varoquaux@normalesup.org',
     url='https://joblib.readthedocs.io',
+    project_urls={
+        'Source': 'https://github.com/joblib/joblib',
+    },
     license='BSD',
     description="Lightweight pipelining with Python functions",
     long_description=long_description,
@@ -28,10 +31,10 @@
         'License :: OSI Approved :: BSD License',
         'Operating System :: OS Independent',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
         'Topic :: Scientific/Engineering',
         'Topic :: Utilities',
         'Topic :: Software Development :: Libraries',
@@ -54,5 +57,5 @@
         'joblib.externals', 'joblib.externals.cloudpickle',
         'joblib.externals.loky', 'joblib.externals.loky.backend',
     ],
-    python_requires='>=3.6',
+    python_requires='>=3.7',
 )