[libcxx] Speeding up partition_point/lower_bound/upper_bound

This is a re-application of r345525, which had been reverted by fear of
a regression.

Reviewed as https://reviews.llvm.org/D53994.
Thanks to Denis Yaroshevskiy for the patch.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@349358 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Louis Dionne
2018-12-17 16:04:39 +00:00
parent a443a0013d
commit 8f7fa38fb9
3 changed files with 210 additions and 4 deletions

View File

@@ -0,0 +1,124 @@
#include <array>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "CartesianBenchmarks.hpp"
#include "GenerateInput.hpp"
namespace {
template <typename I, typename N>
std::array<I, 10> every_10th_percentile_N(I first, N n) {
N step = n / 10;
std::array<I, 10> res;
for (size_t i = 0; i < 10; ++i) {
res[i] = first;
std::advance(first, step);
}
return res;
}
template <class IntT>
struct TestIntBase {
static std::vector<IntT> generateInput(size_t size) {
std::vector<IntT> Res(size);
std::generate(Res.begin(), Res.end(),
[] { return getRandomInteger<IntT>(); });
return Res;
}
};
struct TestInt32 : TestIntBase<std::int32_t> {
static constexpr const char* Name = "TestInt32";
};
struct TestInt64 : TestIntBase<std::int64_t> {
static constexpr const char* Name = "TestInt64";
};
struct TestUint32 : TestIntBase<std::uint32_t> {
static constexpr const char* Name = "TestUint32";
};
struct TestMediumString {
static constexpr const char* Name = "TestMediumString";
static constexpr size_t StringSize = 32;
static std::vector<std::string> generateInput(size_t size) {
std::vector<std::string> Res(size);
std::generate(Res.begin(), Res.end(), [] { return getRandomString(StringSize); });
return Res;
}
};
using AllTestTypes = std::tuple<TestInt32, TestInt64, TestUint32, TestMediumString>;
struct LowerBoundAlg {
template <class I, class V>
I operator()(I first, I last, const V& value) const {
return std::lower_bound(first, last, value);
}
static constexpr const char* Name = "LowerBoundAlg";
};
struct UpperBoundAlg {
template <class I, class V>
I operator()(I first, I last, const V& value) const {
return std::upper_bound(first, last, value);
}
static constexpr const char* Name = "UpperBoundAlg";
};
struct EqualRangeAlg {
template <class I, class V>
std::pair<I, I> operator()(I first, I last, const V& value) const {
return std::equal_range(first, last, value);
}
static constexpr const char* Name = "EqualRangeAlg";
};
using AllAlgs = std::tuple<LowerBoundAlg, UpperBoundAlg, EqualRangeAlg>;
template <class Alg, class TestType>
struct PartitionPointBench {
size_t Quantity;
std::string name() const {
return std::string("PartitionPointBench_") + Alg::Name + "_" +
TestType::Name + '/' + std::to_string(Quantity);
}
void run(benchmark::State& state) const {
auto Data = TestType::generateInput(Quantity);
std::sort(Data.begin(), Data.end());
auto Every10Percentile = every_10th_percentile_N(Data.begin(), Data.size());
for (auto _ : state) {
for (auto Test : Every10Percentile)
benchmark::DoNotOptimize(Alg{}(Data.begin(), Data.end(), *Test));
}
}
};
} // namespace
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
if (benchmark::ReportUnrecognizedArguments(argc, argv))
return 1;
const std::vector<size_t> Quantities = {1 << 8, 1 << 10, 1 << 20};
makeCartesianProductBenchmark<PartitionPointBench, AllAlgs, AllTestTypes>(
Quantities);
benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -750,6 +750,32 @@ public:
bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);}
};
// Perform division by two quickly for positive integers (llvm.org/PR39129)
template <typename _Integral>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
typename enable_if
<
is_integral<_Integral>::value,
_Integral
>::type
__half_positive(_Integral __value)
{
return static_cast<_Integral>(static_cast<typename make_unsigned<_Integral>::type>(__value) / 2);
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
typename enable_if
<
!is_integral<_Tp>::value,
_Tp
>::type
__half_positive(_Tp __value)
{
return __value / 2;
}
#ifdef _LIBCPP_DEBUG
template <class _Compare>
@@ -3202,7 +3228,7 @@ partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __
difference_type __len = _VSTD::distance(__first, __last);
while (__len != 0)
{
difference_type __l2 = __len / 2;
difference_type __l2 = _VSTD::__half_positive(__len);
_ForwardIterator __m = __first;
_VSTD::advance(__m, __l2);
if (__pred(*__m))
@@ -4070,7 +4096,7 @@ __lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
difference_type __len = _VSTD::distance(__first, __last);
while (__len != 0)
{
difference_type __l2 = __len / 2;
difference_type __l2 = _VSTD::__half_positive(__len);
_ForwardIterator __m = __first;
_VSTD::advance(__m, __l2);
if (__comp(*__m, __value_))
@@ -4112,7 +4138,7 @@ __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
difference_type __len = _VSTD::distance(__first, __last);
while (__len != 0)
{
difference_type __l2 = __len / 2;
difference_type __l2 = _VSTD::__half_positive(__len);
_ForwardIterator __m = __first;
_VSTD::advance(__m, __l2);
if (__comp(__value_, *__m))
@@ -4154,7 +4180,7 @@ __equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
difference_type __len = _VSTD::distance(__first, __last);
while (__len != 0)
{
difference_type __l2 = __len / 2;
difference_type __l2 = _VSTD::__half_positive(__len);
_ForwardIterator __m = __first;
_VSTD::advance(__m, __l2);
if (__comp(*__m, __value_))

View File

@@ -0,0 +1,56 @@
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// <algorithm>
// __half_positive divides an integer number by 2 as unsigned number for known types.
// It can be an important optimization for lower bound, for example.
#include <algorithm>
#include <cassert>
#include <limits>
#include <type_traits>
#include "test_macros.h"
#include "user_defined_integral.hpp"
namespace {
template <class IntType, class UnderlyingType = IntType>
TEST_CONSTEXPR bool test(IntType max_v = IntType(std::numeric_limits<UnderlyingType>::max())) {
return std::__half_positive(max_v) == max_v / 2;
}
} // namespace
int main()
{
{
assert(test<char>());
assert(test<int>());
assert(test<long>());
assert((test<UserDefinedIntegral<int>, int>()));
assert(test<size_t>());
#if !defined(_LIBCPP_HAS_NO_INT128)
assert(test<__int128_t>());
#endif // !defined(_LIBCPP_HAS_NO_INT128)
}
#if TEST_STD_VER >= 11
{
static_assert(test<char>(), "");
static_assert(test<int>(), "");
static_assert(test<long>(), "");
static_assert(test<size_t>(), "");
#if !defined(_LIBCPP_HAS_NO_INT128)
static_assert(test<__int128_t>(), "");
#endif // !defined(_LIBCPP_HAS_NO_INT128)
}
#endif // TEST_STD_VER >= 11
}