Improve codegen for deque.

This patch rewrites a few loops in deque and split_buffer to better
optimize the codegen. For constructors like
`deque<unsigned char> d(500000, 0);` this patch results in a 2x speedup.

The patch improves the codegen  in roughly three ways:

1. Changes do { ... } while (...) loops into more typical for loops.
  The optimizer can reason about normal looking loops better.

2. Split the iteration over a range into (A) iteration over the blocks,
then (B) iteration within the block. This nested structure helps LLVM
lower the inner loop to `memset`.

3. Do fewer things each iteration. Some of these loops were incrementing
  or changing 4-5 variables every loop (in addition to the
  construction). Previously most loops would increment the end pointer,
  the size, and decrement the count of remaining items to construct.
  Now we only increment a single pointer for most iterations.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@368547 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Fiselier
2019-08-12 07:51:05 +00:00
parent 60739846ba
commit 6cebf7c36f
5 changed files with 210 additions and 28 deletions

View File

@@ -161,6 +161,19 @@ private:
_LIBCPP_INLINE_VISIBILITY
void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT
{}
struct _ConstructTransaction {
explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT
: __pos_(*__p), __end_(*__p + __n), __dest_(__p) {
}
~_ConstructTransaction() {
*__dest_ = __pos_;
}
pointer __pos_;
const pointer __end_;
private:
pointer *__dest_;
};
};
template <class _Tp, class _Allocator>
@@ -197,13 +210,10 @@ template <class _Tp, class _Allocator>
void
__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n)
{
__alloc_rr& __a = this->__alloc();
do
{
__alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
++this->__end_;
--__n;
} while (__n > 0);
_ConstructTransaction __tx(&this->__end_, __n);
for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
__alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_));
}
}
// Copy constructs __n objects starting at __end_ from __x
@@ -216,13 +226,11 @@ template <class _Tp, class _Allocator>
void
__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
{
__alloc_rr& __a = this->__alloc();
do
{
__alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
++this->__end_;
--__n;
} while (__n > 0);
_ConstructTransaction __tx(&this->__end_, __n);
for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
__alloc_traits::construct(this->__alloc(),
_VSTD::__to_raw_pointer(__tx.__pos_), __x);
}
}
template <class _Tp, class _Allocator>
@@ -262,11 +270,10 @@ typename enable_if
>::type
__split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last)
{
__alloc_rr& __a = this->__alloc();
for (; __first != __last; ++__first)
{
__alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), *__first);
++this->__end_;
_ConstructTransaction __tx(&this->__end_, std::distance(__first, __last));
for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, ++__first) {
__alloc_traits::construct(this->__alloc(),
_VSTD::__to_raw_pointer(__tx.__pos_), *__first);
}
}