Skip to content

Commit 4e37b20

Browse files
Introduce support for move-only types in InlinedVector, and use move during resize.
Note: the previous code in Move seemed to use std::move, but without effect, since the argument was a const&. This cl fixes the problem, shown by my test with a MoveOnly type. From: Benchmark Time(ns) CPU(ns) Iterations --------------------------------------------------------------- BM_InlinedVectorFillString/0 2 2 357702278 BM_InlinedVectorFillString/1 23 23 30020181 40.8M items/s BM_InlinedVectorFillString/8 177 177 3938795 43.1M items/s BM_InlinedVectorFillString/64 2515 2516 278714 24.3M items/s BM_InlinedVectorFillString/512 21497 21478 31994 22.7M items/s BM_InlinedVectorFillString/1k 43361 43312 16184 22.5M items/s To: Benchmark Time(ns) CPU(ns) Iterations --------------------------------------------------------------- BM_InlinedVectorFillString/0 2 2 357070962 BM_InlinedVectorFillString/1 23 23 31134543 42.0M items/s BM_InlinedVectorFillString/8 166 166 4246010 45.9M items/s BM_InlinedVectorFillString/64 1645 1644 427302 37.1M items/s BM_InlinedVectorFillString/512 13830 13806 49712 35.4M items/s BM_InlinedVectorFillString/1k 28164 28114 24588 34.7M items/s Change: 130922812
1 parent 860334f commit 4e37b20

File tree

2 files changed

+116
-32
lines changed

2 files changed

+116
-32
lines changed

tensorflow/core/lib/gtl/inlined_vector.h

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -187,21 +187,29 @@ class InlinedVector {
187187
return at(0);
188188
}
189189

190-
// Append t to the vector.
190+
// Append a T constructed with args to the vector.
191191
// Increases size() by one.
192192
// Amortized complexity: O(1)
193193
// Worst-case complexity: O(size())
194-
inline void push_back(const value_type& t) {
194+
template <typename... Args>
195+
void emplace_back(Args&&... args) {
195196
size_t s = size();
196197
DCHECK_LE(s, capacity());
197198
if (s < capacity()) {
198-
new (data() + s) T(t);
199+
new (data() + s) T(std::forward<Args>(args)...);
199200
set_size_internal(s + 1);
200201
} else {
201-
PushBackSlow(t);
202+
EmplaceBackSlow(std::forward<Args>(args)...);
202203
}
203204
}
204205

206+
// Append t to the vector.
207+
// Increases size() by one.
208+
// Amortized complexity: O(1)
209+
// Worst-case complexity: O(size())
210+
void push_back(const value_type& t) { emplace_back(t); }
211+
void push_back(value_type&& t) { emplace_back(std::move(t)); }
212+
205213
inline void pop_back() {
206214
DCHECK(!empty());
207215
const size_t s = size();
@@ -348,16 +356,26 @@ class InlinedVector {
348356
}
349357
}
350358

351-
void PushBackSlow(const T& t) {
359+
template <typename... Args>
360+
void EmplaceBackSlow(Args&&... args) {
352361
const size_t s = size();
353362
DCHECK_EQ(s, capacity());
354-
Grow<Move, Fill>(s + 1, &t);
363+
Grow<Move, Construct>(s + 1, std::forward<Args>(args)...);
355364
set_size_internal(s + 1);
356365
}
357366

367+
// Movers for Grow
358368
// Does nothing.
359-
static void Nop(const T* src, size_t n, T* dst) {}
369+
static void Nop(T* src, size_t n, T* dst) {}
370+
371+
// Moves srcs[0,n-1] contents to dst[0,n-1].
372+
static void Move(T* src, size_t n, T* dst) {
373+
for (size_t i = 0; i < n; i++) {
374+
new (dst + i) T(std::move(*(src + i)));
375+
}
376+
}
360377

378+
// Initializers for Resize.
361379
// Initializes dst[0,n-1] with empty constructor.
362380
static void ValueInit(const T*, size_t n, T* dst) {
363381
for (size_t i = 0; i < n; i++) {
@@ -372,13 +390,6 @@ class InlinedVector {
372390
}
373391
}
374392

375-
// Moves srcs[0,n-1] contents to dst[0,n-1].
376-
static void Move(const T* src, size_t n, T* dst) {
377-
for (size_t i = 0; i < n; i++) {
378-
new (dst + i) T(std::move(*(src + i)));
379-
}
380-
}
381-
382393
void Destroy(T* src, int n) {
383394
if (!std::is_trivially_destructible<T>::value) {
384395
for (int i = 0; i < n; i++) {
@@ -387,14 +398,28 @@ class InlinedVector {
387398
}
388399
}
389400

401+
// Initialization methods for Grow.
402+
// 1) Leave uninitialized memory.
403+
struct Uninitialized {
404+
void operator()(T*) const {}
405+
};
406+
// 2) Construct a T with args at not-yet-initialized memory pointed by dst.
407+
struct Construct {
408+
template<class... Args>
409+
void operator()(T* dst, Args&&... args) const {
410+
new (dst) T(std::forward<Args>(args)...);
411+
}
412+
};
413+
390414
// Grow so that capacity >= n. Uses Mover to move existing elements
391-
// to new buffer. If elem is not-null, stores it at slot numbered
392-
// size() before destroying the old buffer by calling Initializer.
393-
// We pass the Initializer and Mover as template arguments so that
394-
// this code compiles even if T does not support copying.
395-
template <void(Mover)(const T*, size_t, T*),
396-
void(Initializer)(const T*, size_t, T*) = Nop>
397-
void Grow(size_t n, const T* elem = nullptr) {
415+
// to new buffer, and possibly initialize the new element according
416+
// to InitType.
417+
// We pass the InitType and Mover as template arguments so that
418+
// this code compiles even if T does not support copying or default
419+
// construction.
420+
template <void(Mover)(T*, size_t, T*), class InitType = Uninitialized,
421+
class... Args>
422+
void Grow(size_t n, Args&&... args) {
398423
size_t s = size();
399424
DCHECK_LE(s, capacity());
400425

@@ -411,10 +436,7 @@ class InlinedVector {
411436
T* dst = static_cast<T*>(malloc(target * sizeof(T)));
412437

413438
// Need to copy elem before discarding src since it might alias src.
414-
if (elem) {
415-
Initializer(elem, 1, dst + s);
416-
}
417-
439+
InitType{}(dst + s, std::forward<Args>(args)...);
418440
Mover(src, s, dst);
419441
DiscardStorage();
420442

tensorflow/core/lib/gtl/inlined_vector_test.cc

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -236,15 +236,20 @@ TEST(RefCountedVec, EraseBeginEnd) {
236236
}
237237

238238
struct NoDefaultCtor {
239-
explicit NoDefaultCtor(int /* x */) {}
239+
explicit NoDefaultCtor(int) {}
240240
};
241241
struct NoCopy {
242242
NoCopy() {}
243-
NoCopy(const NoCopy& /* x */) = delete;
243+
NoCopy(const NoCopy&) = delete;
244244
};
245245
struct NoAssign {
246246
NoAssign() {}
247-
NoAssign& operator=(const NoAssign& /* x */) = delete;
247+
NoAssign& operator=(const NoAssign&) = delete;
248+
};
249+
struct MoveOnly {
250+
MoveOnly() {}
251+
MoveOnly(MoveOnly&&) = default;
252+
MoveOnly& operator=(MoveOnly&&) = default;
248253
};
249254
TEST(InlinedVectorTest, NoDefaultCtor) {
250255
tensorflow::gtl::InlinedVector<NoDefaultCtor, 1> v(10, NoDefaultCtor(2));
@@ -258,6 +263,12 @@ TEST(InlinedVectorTest, NoAssign) {
258263
tensorflow::gtl::InlinedVector<NoAssign, 1> v(10);
259264
(void)v;
260265
}
266+
TEST(InlinedVectorTest, MoveOnly) {
267+
gtl::InlinedVector<MoveOnly, 2> v;
268+
v.push_back(MoveOnly{});
269+
v.push_back(MoveOnly{});
270+
v.push_back(MoveOnly{});
271+
}
261272

262273
TEST(IntVec, Insert) {
263274
for (int len = 0; len < 20; len++) {
@@ -432,7 +443,7 @@ static std::vector<typename T::value_type> Vec(const T& src) {
432443
TEST(IntVec, SelfRefPushBack) {
433444
std::vector<string> std_v;
434445
tensorflow::gtl::InlinedVector<string, 4> v;
435-
const string s = "A very long string to ensure heap.";
446+
const string s = "A quite long string to ensure heap.";
436447
std_v.push_back(s);
437448
v.push_back(s);
438449
for (int i = 0; i < 20; ++i) {
@@ -444,6 +455,21 @@ TEST(IntVec, SelfRefPushBack) {
444455
EXPECT_EQ(std_v, Vec(v));
445456
}
446457

458+
TEST(IntVec, SelfRefPushBackWithMove) {
459+
std::vector<string> std_v;
460+
gtl::InlinedVector<string, 4> v;
461+
const string s = "A quite long string to ensure heap.";
462+
std_v.push_back(s);
463+
v.push_back(s);
464+
for (int i = 0; i < 20; ++i) {
465+
EXPECT_EQ(v.back(), std_v.back());
466+
467+
v.push_back(std::move(v.back()));
468+
std_v.push_back(std::move(std_v.back()));
469+
}
470+
EXPECT_EQ(v.back(), std_v.back());
471+
}
472+
447473
TEST(IntVec, Swap) {
448474
for (int l1 = 0; l1 < 20; l1++) {
449475
SCOPED_TRACE(l1);
@@ -733,7 +759,7 @@ static void BM_InlinedVectorFill(int iters, int len) {
733759
v.push_back(j);
734760
}
735761
}
736-
testing::BytesProcessed((static_cast<int64>(iters) * len) * sizeof(int));
762+
testing::BytesProcessed((int64{iters} * len) * sizeof(int));
737763
}
738764
BENCHMARK(BM_InlinedVectorFill)->Range(0, 1024);
739765

@@ -745,7 +771,7 @@ static void BM_InlinedVectorFillRange(int iters, int len) {
745771
for (int i = 0; i < iters; i++) {
746772
IntVec TF_ATTRIBUTE_UNUSED v(ia.get(), ia.get() + len);
747773
}
748-
testing::BytesProcessed((static_cast<int64>(iters) * len) * sizeof(int));
774+
testing::BytesProcessed((int64{iters} * len) * sizeof(int));
749775
}
750776
BENCHMARK(BM_InlinedVectorFillRange)->Range(0, 1024);
751777

@@ -756,10 +782,46 @@ static void BM_StdVectorFill(int iters, int len) {
756782
v.push_back(j);
757783
}
758784
}
759-
testing::BytesProcessed((static_cast<int64>(iters) * len) * sizeof(int));
785+
testing::BytesProcessed((int64{iters} * len) * sizeof(int));
760786
}
761787
BENCHMARK(BM_StdVectorFill)->Range(0, 1024);
762788

789+
bool StringRepresentedInline(string s) {
790+
const char* chars = s.data();
791+
string s1 = std::move(s);
792+
return s1.data() != chars;
793+
}
794+
795+
static void BM_InlinedVectorFillString(int iters, int len) {
796+
string strings[4] = {"a quite long string", "another long string",
797+
"012345678901234567", "to cause allocation"};
798+
for (int i = 0; i < iters; i++) {
799+
gtl::InlinedVector<string, 8> v;
800+
for (int j = 0; j < len; j++) {
801+
v.push_back(strings[j & 3]);
802+
}
803+
}
804+
testing::ItemsProcessed(int64{iters} * len);
805+
}
806+
BENCHMARK(BM_InlinedVectorFillString)->Range(0, 1024);
807+
808+
static void BM_StdVectorFillString(int iters, int len) {
809+
string strings[4] = {"a quite long string", "another long string",
810+
"012345678901234567", "to cause allocation"};
811+
for (int i = 0; i < iters; i++) {
812+
std::vector<string> v;
813+
for (int j = 0; j < len; j++) {
814+
v.push_back(strings[j & 3]);
815+
}
816+
}
817+
testing::ItemsProcessed(int64{iters} * len);
818+
// The purpose of the benchmark is to verify that inlined vector is
819+
// efficient when moving is more efficent than copying. To do so, we
820+
// use strings that are larger than the small string optimization.
821+
CHECK(!StringRepresentedInline(strings[0]));
822+
}
823+
BENCHMARK(BM_StdVectorFillString)->Range(0, 1024);
824+
763825
namespace {
764826
struct Buffer { // some arbitrary structure for benchmarking.
765827
char* base;

0 commit comments

Comments
 (0)