From 79b01b932b70af1e10c0332efcde067726552536 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 14 Nov 2019 16:30:05 -0600 Subject: [PATCH 1/2] Ensure BufferBlocks are completed and empty in RowShufflingTransformer. If BufferBlock doesn't get completed and drained of its items, it will have non-completed Tasks. When debugging in VS, this will appear to be a memory leak because VS adds all running Tasks to a static Dictionary, and then removes them when the Task is complete. If the Task doesn't get completed, it won't be removed from the Dictionary - thus it looks like a leak. Note there is no leak when the VS Debugger isn't attached because the non-completed Tasks don't get added to the static Dictionary. Fix #4399 --- .../Transforms/RowShufflingTransformer.cs | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs index c6eb4f9483..9e96db6a6e 100644 --- a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs @@ -553,11 +553,24 @@ protected override void Dispose(bool disposing) { if (_disposed) return; - if (disposing && _producerTask.Status == TaskStatus.Running) + + if (disposing) { - _toProduce.Post(0); + _toProduce.Complete(); _producerTask.Wait(); + + // Complete the consumer after the producerTask has finished, since producerTask could + // have posted more items to _toConsume. + _toConsume.Complete(); + + // Drain both BufferBlocks - this prevents what appears to be memory leaks when using the VS Debugger + // because if a BufferBlock still contains items, its underlying Tasks are not getting completed. + // See https://github.com/dotnet/corefx/issues/30582 for the VS Debugger issue. + // See also https://github.com/dotnet/machinelearning/issues/4399 + _toProduce.TryReceiveAll(out _); + _toConsume.TryReceiveAll(out _); } + _disposed = true; base.Dispose(disposing); } @@ -578,15 +591,16 @@ private async Task LoopProducerWorker() try { int circularIndex = 0; - for (; ; ) + while (await _toProduce.OutputAvailableAsync()) { - int requested = await _toProduce.ReceiveAsync(); - if (requested == 0) + int requested; + if (!_toProduce.TryReceive(out requested)) { - // We had some sort of early exit. Just go out, do not post even the - // sentinel to the consumer, as nothing will be consumed any more. - return; + // OutputAvailableAsync returned true, but TryReceive returned false - + // so loop back around and try again. + continue; } + Ch.Assert(requested >= _blockSize); int numRows; for (numRows = 0; numRows < requested; ++numRows) From 989831b48a2eeabe152446bc7aafe9a6ccea9cd1 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 15 Nov 2019 12:35:56 -0600 Subject: [PATCH 2/2] PR feedback --- src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs index 9e96db6a6e..265783f137 100644 --- a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs @@ -591,7 +591,7 @@ private async Task LoopProducerWorker() try { int circularIndex = 0; - while (await _toProduce.OutputAvailableAsync()) + while (await _toProduce.OutputAvailableAsync().ConfigureAwait(false)) { int requested; if (!_toProduce.TryReceive(out requested))