Skip to content

Commit c4ec7db

Browse files
committed
SERVER-16773: better tcmalloc cleanup
1 parent 8bb811a commit c4ec7db

File tree

6 files changed

+44
-22
lines changed

6 files changed

+44
-22
lines changed

src/mongo/db/concurrency/lock_state.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,6 @@ namespace {
728728

729729
// If infinite timeout was requested, just keep waiting
730730
if (timeoutMs == UINT_MAX) {
731-
markThreadIdle();
732731
continue;
733732
}
734733

@@ -739,9 +738,6 @@ namespace {
739738
if (waitTimeMs == 0) {
740739
break;
741740
}
742-
743-
// We have waited for a while and may likely be waiting even longer, mark us as idle
744-
markThreadIdle();
745741
}
746742

747743
// Cleanup the state, since this is an unused lock now

src/mongo/util/net/message_server_port.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,7 @@ namespace {
204204
portWithHandler->psock->setLogLevel(logger::LogSeverity::Debug(1));
205205

206206
Message m;
207-
int64_t lastIdle = Listener::getElapsedTimeMillis();
208-
int64_t avgMillisBetweenIdle = 0;
207+
int64_t counter = 0;
209208
try {
210209
LastError * le = new LastError();
211210
lastError.reset( le ); // lastError now has ownership
@@ -231,20 +230,10 @@ namespace {
231230
networkCounter.hit(portWithHandler->psock->getBytesIn(),
232231
portWithHandler->psock->getBytesOut());
233232

234-
// Connections that don't run at a high rate should mark an idle point
235-
// between operations to allow cleanup of the thread-local malloc cache.
236-
// Just before a receive is a reasonable point, as we may overlap with
237-
// the processing of a command response. Avoid doing this in very active
238-
// threads as they are actively using their memory and not experiencing
239-
// resource starvation. Use the course clock with averaging for efficiency.
240-
241-
const int64_t now = Listener::getElapsedTimeMillis();
242-
const int64_t millisSinceIdle = now - lastIdle;
243-
avgMillisBetweenIdle = (7 * avgMillisBetweenIdle + millisSinceIdle) / 8;
244-
if (avgMillisBetweenIdle >= 10) {
233+
// Occasionally we want to see if we're using too much memory.
234+
if ((counter++ & 0xf) == 0) {
245235
markThreadIdle();
246236
}
247-
lastIdle = now;
248237
}
249238
}
250239
catch ( AssertionException& e ) {

src/mongo/util/tcmalloc_server_status_section.cpp

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@
2525
* then also delete it in the license file.
2626
*/
2727

28+
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
29+
2830
#include "mongo/platform/basic.h"
2931

3032
#include <third_party/gperftools-2.2/src/gperftools/malloc_extension.h>
3133

3234
#include "mongo/base/init.h"
3335
#include "mongo/db/commands/server_status.h"
3436
#include "mongo/util/concurrency/synchronization.h"
37+
#include "mongo/util/log.h"
3538
#include "mongo/util/net/listen.h"
3639

3740
namespace mongo {
@@ -43,16 +46,35 @@ namespace {
4346
// a long time.
4447
const int kManyClients = 40;
4548

49+
boost::mutex tcmallocCleanupLock;
50+
4651
/**
4752
* Callback to allow TCMalloc to release freed memory to the central list at
4853
* favorable times. Ideally would do some milder cleanup or scavenge...
4954
*/
5055
void threadStateChange() {
51-
if (Listener::globalTicketHolder.used() > kManyClients) {
52-
// Mark thread busy while we're idle, so that overhead is incurred now.
53-
MallocExtension::instance()->MarkThreadIdle();
54-
MallocExtension::instance()->MarkThreadBusy();
56+
if (Listener::globalTicketHolder.used() <= kManyClients) {
57+
return;
5558
}
59+
60+
size_t threadCacheSizeBytes = MallocExtension::instance()->GetThreadCacheSize();
61+
62+
static const size_t kMaxThreadCacheSizeBytes = 0x10000;
63+
if (threadCacheSizeBytes < kMaxThreadCacheSizeBytes) {
64+
// This number was chosen a bit magically.
65+
// At 1000 threads and the current (64mb) thread local cache size, we're "full".
66+
// So we may want this number to scale with the number of current clients.
67+
return;
68+
}
69+
70+
LOG(1) << "thread over memory limit, cleaning up, current: "
71+
<< (threadCacheSizeBytes/1024) << "k";
72+
73+
// We synchronize as the tcmalloc central list uses a spinlock, and we can cause a really
74+
// terrible runaway if we're not careful.
75+
boost::mutex::scoped_lock lk(tcmallocCleanupLock);
76+
MallocExtension::instance()->MarkThreadIdle();
77+
MallocExtension::instance()->MarkThreadBusy();
5678
}
5779

5880
// Register threadStateChange callback

src/third_party/gperftools-2.2/src/gperftools/malloc_extension.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ class PERFTOOLS_DLL_DECL MallocExtension {
240240
// Most malloc implementations ignore this routine.
241241
virtual void MarkThreadBusy();
242242

243+
// Gets the size of this thread's cache in bytes.
244+
// MONGODB ADDITION
245+
virtual size_t GetThreadCacheSize();
246+
243247
// Gets the system allocator used by the malloc extension instance. Returns
244248
// NULL for malloc implementations that do not support pluggable system
245249
// allocators.

src/third_party/gperftools-2.2/src/malloc_extension.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ void MallocExtension::MarkThreadBusy() {
151151
// Default implementation does nothing
152152
}
153153

154+
size_t MallocExtension::GetThreadCacheSize() {
155+
return 0;
156+
}
157+
154158
SysAllocator* MallocExtension::GetSystemAllocator() {
155159
return NULL;
156160
}

src/third_party/gperftools-2.2/src/tcmalloc.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,13 @@ class TCMallocImplementation : public MallocExtension {
731731

732732
virtual void MarkThreadBusy(); // Implemented below
733733

734+
virtual size_t GetThreadCacheSize() {
735+
ThreadCache* tc = ThreadCache::GetCacheIfPresent();
736+
if (!tc)
737+
return 0;
738+
return tc->Size();
739+
}
740+
734741
virtual SysAllocator* GetSystemAllocator() {
735742
SpinLockHolder h(Static::pageheap_lock());
736743
return sys_alloc;

0 commit comments

Comments
 (0)