Skip to content

Commit

Permalink
Add ContiguousDisjointSet datastructure
Browse files Browse the repository at this point in the history
  • Loading branch information
clonker committed Sep 9, 2024
1 parent 09e9aa6 commit a13a86f
Show file tree
Hide file tree
Showing 5 changed files with 281 additions and 0 deletions.
2 changes: 2 additions & 0 deletions libsolutil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 8,8 @@ set(sources
CommonIO.cpp
CommonIO.h
cxx20.h
DisjointSet.cpp
DisjointSet.h
DominatorFinder.h
Exceptions.cpp
Exceptions.h
Expand Down
109 changes: 109 additions & 0 deletions libsolutil/DisjointSet.cpp
Original file line number Diff line number Diff line change
@@ -0,0 1,109 @@
/*
This file is part of solidity.

solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0

#include <libsolutil/DisjointSet.h>

#include <liblangutil/Exceptions.h>

#include <numeric>

using namespace solidity::util;

ContiguousDisjointSet::ContiguousDisjointSet(size_t const _numNodes):
m_parents(_numNodes),
m_neighbors(_numNodes),
m_sizes(_numNodes, static_cast<value_type>(1)),
m_numSets(_numNodes)
{
// each is their own neighbor and parent
std::iota(m_parents.begin(), m_parents.end(), 0);
std::iota(m_neighbors.begin(), m_neighbors.end(), 0);
}

size_t ContiguousDisjointSet::numSets() const { return m_numSets; }

ContiguousDisjointSet::value_type ContiguousDisjointSet::find(value_type const _element) const
{
solAssert(_element < m_parents.size());
// path halving
value_type rootElement = _element;
while (rootElement != m_parents[rootElement])
{
m_parents[rootElement] = m_parents[m_parents[rootElement]];
rootElement = m_parents[rootElement];
}
return rootElement;
}

void ContiguousDisjointSet::merge(value_type const _x, value_type const _y, bool const _mergeBySize)
{
auto xRoot = find(_x);
auto yRoot = find(_y);

if (xRoot == yRoot)
return; // we're done, nothing to merge here

// if merge by size: merge smaller (yRoot) into larger (xRoot) subset;
// otherwise if _x is the representative of subset(_x), it will stay representative
if (_mergeBySize && m_sizes[xRoot] < m_sizes[yRoot])
std::swap(xRoot, yRoot);

m_parents[yRoot] = xRoot;
m_sizes[xRoot] = m_sizes[yRoot];
std::swap(m_neighbors[xRoot], m_neighbors[yRoot]);
--m_numSets;
}

bool ContiguousDisjointSet::sameSubset(value_type const _x, value_type const _y) const
{
return find(_x) == find(_y);
}

ContiguousDisjointSet::size_type ContiguousDisjointSet::sizeOfSubset(value_type const _x) const
{
return m_sizes[find(_x)];
}

std::set<ContiguousDisjointSet::value_type> ContiguousDisjointSet::subset(value_type const _x) const
{
solAssert(_x < m_parents.size());
std::set<value_type> result{_x};
value_type neighbor = m_neighbors[_x];
while (neighbor != _x)
{
result.insert(neighbor);
neighbor = m_neighbors[neighbor];
}
return result;
}

std::vector<std::set<ContiguousDisjointSet::value_type>> ContiguousDisjointSet::subsets() const
{
std::vector<std::set<value_type>> result;
std::vector<std::uint8_t> visited(m_parents.size(), false);
for (value_type x = 0; x < m_parents.size(); x)
{
auto xRoot = find(x);
if (!visited[xRoot])
{
result.push_back(subset(xRoot));
visited[xRoot] = true;
}
}
return result;
}
66 changes: 66 additions & 0 deletions libsolutil/DisjointSet.h
Original file line number Diff line number Diff line change
@@ -0,0 1,66 @@
/*
This file is part of solidity.

solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0

#pragma once

#include <cstddef>
#include <set>
#include <vector>

namespace solidity::util
{

/// Implementation of the Disjoint set data structure [1], also called union-set, with path-halving [2].
/// This implementation assumes that each set element is identified with an element in a iota range (hence contiguous).
///
/// [1] https://en.wikipedia.org/wiki/Disjoint-set_data_structure
/// [2] Tarjan, Robert E., and Jan Van Leeuwen. "Worst-case analysis of set union algorithms."
/// Journal of the ACM (JACM) 31.2 (1984): 245-281.
class ContiguousDisjointSet
{
public:
using size_type = size_t;
using value_type = size_t;

/// Constructs a new disjoint set datastructure with `_numNodes` elements and each element in its own individual set
explicit ContiguousDisjointSet(size_t _numNodes);

size_type numSets() const;

/// finds one representative for a set that contains `_element`
value_type find(value_type _element) const;

/// joins the two sets containing `_x` and `_y`, returns true if the sets were disjoint, otherwise false
void merge(value_type _x, value_type _y, bool _mergeBySize=true);

bool sameSubset(value_type _x, value_type _y) const;

size_type sizeOfSubset(value_type _x) const;

std::set<value_type> subset(value_type _x) const;

std::vector<std::set<value_type>> subsets() const;

private:
std::vector<value_type> mutable m_parents; // mutable for path compression, doesn't change semantic state
std::vector<value_type> m_neighbors;
std::vector<value_type> m_sizes;
size_type m_numSets;
};

}
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 31,7 @@ set(libsolutil_sources
libsolutil/Checksum.cpp
libsolutil/CommonData.cpp
libsolutil/CommonIO.cpp
libsolutil/DisjointSet.cpp
libsolutil/DominatorFinderTest.cpp
libsolutil/FixedHash.cpp
libsolutil/FunctionSelector.cpp
Expand Down
103 changes: 103 additions & 0 deletions test/libsolutil/DisjointSet.cpp
Original file line number Diff line number Diff line change
@@ -0,0 1,103 @@
/*
This file is part of solidity.

solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Unit tests for FixedHash.
*/

#include <libsolutil/DisjointSet.h>

#include <boost/test/unit_test.hpp>

namespace solidity::util::test
{

BOOST_AUTO_TEST_SUITE(DisjointSetTest)

BOOST_AUTO_TEST_CASE(full_union)
{
ContiguousDisjointSet ds(10);
for (size_t i = 1; i < 10; i)
{
BOOST_CHECK(!ds.sameSubset(0, i));
ds.merge(0, i);
BOOST_CHECK(ds.sameSubset(0, i));
}
BOOST_CHECK_EQUAL(ds.numSets(), 1);
auto const& subsets = ds.subsets();
BOOST_CHECK_EQUAL(subsets.size(), 1);
std::set<size_t> fullSet{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
BOOST_CHECK_EQUAL_COLLECTIONS(subsets[0].begin(), subsets[0].end(), fullSet.begin(), fullSet.end());

for (size_t i = 1; i < 10; i) BOOST_CHECK_EQUAL(ds.find(0), ds.find(i));
}

BOOST_AUTO_TEST_CASE(pairs)
{
ContiguousDisjointSet ds(10);
BOOST_CHECK_EQUAL(ds.numSets(), 10);
BOOST_CHECK_EQUAL(ds.subsets().size(), 10);

auto const checkPair = [&](size_t expectedNumSubsets, size_t x, size_t y)
{
BOOST_CHECK_EQUAL(ds.numSets(), expectedNumSubsets);
BOOST_CHECK_EQUAL(ds.subsets().size(), expectedNumSubsets);
BOOST_CHECK(ds.sameSubset(x, y));
auto const subset = ds.subset(x);
std::set<size_t> subsetRef{x, y};
BOOST_CHECK_EQUAL_COLLECTIONS(subset.begin(), subset.end(), subsetRef.begin(), subsetRef.end());
};

ds.merge(5, 3);
checkPair(9, 5, 3);

ds.merge(1, 6);
checkPair(8, 1, 6);

ds.merge(0, 2);
checkPair(7, 0, 2);

ds.merge(1, 5);
BOOST_CHECK_EQUAL(ds.sizeOfSubset(3), 4);

ds.merge(1, 0);

// now we should have a subset with {0, 1, 2, 3, 5, 6}
std::set<size_t> subsetRef{0, 1, 2, 3, 5, 6};
BOOST_CHECK_EQUAL(ds.sizeOfSubset(6), subsetRef.size());
auto const subset = ds.subset(2);
BOOST_CHECK_EQUAL_COLLECTIONS(subset.begin(), subset.end(), subsetRef.begin(), subsetRef.end());
}

BOOST_AUTO_TEST_CASE(merge_with_fixed_representative)
{
ContiguousDisjointSet ds(10);
ds.merge(5, 3, false);
BOOST_CHECK_EQUAL(ds.find(5), 5);
ds.merge(1, 2);
ds.merge(7, 8);
ds.merge(0, 9);
ds.merge(5, 1, false);
BOOST_CHECK_EQUAL(ds.find(5), 5);
ds.merge(5, 0, false);
BOOST_CHECK_EQUAL(ds.find(5), 5);
ds.merge(5, 7, false);
BOOST_CHECK_EQUAL(ds.find(5), 5);
}

BOOST_AUTO_TEST_SUITE_END()

}

0 comments on commit a13a86f

Please sign in to comment.