Skip to content

Commit ad50daf

Browse files
author
Vijay Vasudevan
committed
TensorFlow: merge commits from internal
2 parents cdf0dbf + f9d3e9d commit ad50daf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2784
-1274
lines changed

tensorflow/core/common_runtime/executor.cc

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ class ExecutorState {
494494
int max_parallel_iterations = 1;
495495

496496
// The iteration states of this frame.
497-
std::vector<IterationState*> iterations;
497+
gtl::InlinedVector<IterationState*, 12> iterations;
498498

499499
// The NextIteration nodes to enter a new iteration. If the number of
500500
// outstanding iterations reaches the limit, we will defer the start of
@@ -672,6 +672,16 @@ class ExecutorState {
672672

673673
// One thread of control finishes.
674674
void Finish();
675+
676+
// A standalone routine for this expression so that we can express
677+
// that we don't want thread safety analysis on this reference (it's
678+
// safe to do without the lock because the iterations array never
679+
// resizes and this particular iteration's array element will not
680+
// be changed out from under us because the iteration is still alive).
681+
std::vector<Entry>* GetInputTensors(FrameState* input_frame, int64 input_iter)
682+
const NO_THREAD_SAFETY_ANALYSIS {
683+
return input_frame->GetIteration(input_iter)->input_tensors;
684+
}
675685
};
676686

677687
ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
@@ -891,13 +901,8 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) {
891901

892902
VLOG(1) << "Process node: " << id << " " << SummarizeNodeDef(node->def());
893903

894-
std::vector<Entry>* input_tensors;
895-
{
896-
// Need the lock because the iterations vector could be resized by
897-
// another thread.
898-
mutex_lock l(mu_);
899-
input_tensors = input_frame->GetIteration(input_iter)->input_tensors;
900-
}
904+
std::vector<Entry>* input_tensors =
905+
GetInputTensors(input_frame, input_iter);
901906
Entry* first_input = input_tensors->data() + item.input_start;
902907
outputs.clear();
903908
outputs.resize(node->num_outputs());
@@ -1081,9 +1086,9 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
10811086

10821087
for (int i = 0; i < node->num_outputs(); ++i) {
10831088
TensorValue val = ctx->release_output(i);
1084-
// Only Switch and Recv can generate new dead outputs.
10851089
if (*ctx->is_output_dead() || val.tensor == nullptr) {
1086-
DCHECK(IsSwitch(node) || IsRecv(node));
1090+
DCHECK(IsSwitch(node) || IsRecv(node))
1091+
<< "Only Switch and Recv can generate new dead outputs.";
10871092
} else {
10881093
Entry* out = &((*outputs)[i]);
10891094
out->has_value = true;

tensorflow/core/common_runtime/gpu/gpu_device.cc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -279,9 +279,8 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
279279
context->SetStatus(errors::Internal(
280280
"Invalid synchronous 'Compute' on GPU for '_Recv' op"));
281281
} else {
282-
const string label =
283-
strings::StrCat(op_kernel->name(), ":", op_kernel->type_string());
284-
port::Tracing::ScopedAnnotation annotation(label);
282+
port::Tracing::ScopedAnnotation annotation(op_kernel->name(),
283+
op_kernel->type_string());
285284

286285
const auto num_streams = streams_.size();
287286
if (num_streams > 1) {
@@ -320,18 +319,19 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
320319
// Keep a copy of the inputs before Compute runs, in case they get
321320
// deleted. TODO(misard) this will be fixed when the tracking is
322321
// done right.
323-
std::vector<Tensor>* tensor_refs = nullptr;
322+
EventMgr::TensorReferenceVector* tensor_refs = nullptr;
324323
if (!FLAGS_brain_gpu_sync_every_op) {
325-
tensor_refs = new std::vector<Tensor>;
326-
tensor_refs->reserve(context->num_inputs() + context->num_outputs());
327-
for (int ii = 0; ii < context->num_inputs(); ++ii) {
324+
const int N_inputs = context->num_inputs();
325+
tensor_refs = new EventMgr::TensorReferenceVector;
326+
tensor_refs->reserve(N_inputs + context->num_outputs());
327+
for (int ii = 0; ii < N_inputs; ++ii) {
328328
if (context->has_input(ii)) {
329329
if (IsRefType(context->input_dtype(ii))) {
330330
Tensor in = context->mutable_input(ii, false);
331-
tensor_refs->push_back(in);
331+
tensor_refs->push_back(TensorReference(in));
332332
} else {
333333
const Tensor& in = context->input(ii);
334-
tensor_refs->push_back(in);
334+
tensor_refs->push_back(TensorReference(in));
335335
}
336336
}
337337
}
@@ -353,12 +353,12 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
353353
for (int ii = 0; ii < context->num_temps(); ++ii) {
354354
Tensor* temp = context->temp(ii);
355355
VLOG(2) << "Saving ref to temp Tensor @ " << DMAHelper::base(temp);
356-
tensor_refs->push_back(*temp);
356+
tensor_refs->push_back(TensorReference(*temp));
357357
}
358358
for (int ii = 0; ii < context->num_outputs(); ++ii) {
359359
Tensor* temp = context->mutable_output(ii);
360360
if (nullptr != temp) {
361-
tensor_refs->push_back(*temp);
361+
tensor_refs->push_back(TensorReference(*temp));
362362
}
363363
}
364364
em_->ThenDeleteTensors(stream, tensor_refs);

tensorflow/core/common_runtime/gpu/gpu_event_mgr.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ limitations under the License.
1919
#include <deque>
2020
#include <vector>
2121
#include "tensorflow/stream_executor/stream.h"
22+
#include "tensorflow/core/framework/tensor_reference.h"
2223
#include "tensorflow/core/lib/core/notification.h"
2324
#include "tensorflow/core/lib/core/threadpool.h"
2425
#include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -45,10 +46,12 @@ class EventMgr {
4546

4647
~EventMgr();
4748

49+
typedef gtl::InlinedVector<TensorReference, 4> TensorReferenceVector;
50+
4851
// Takes ownership of *tensors and deletes it as soon as all events
4952
// currently enqueued on *stream have completed.
5053
inline void ThenDeleteTensors(perftools::gputools::Stream* stream,
51-
std::vector<Tensor>* tensors) {
54+
TensorReferenceVector* tensors) {
5255
ToFreeVector to_free;
5356
{
5457
mutex_lock l(mu_);
@@ -94,7 +97,7 @@ class EventMgr {
9497

9598
struct InUse {
9699
perftools::gputools::Event* event;
97-
std::vector<Tensor>* mem;
100+
TensorReferenceVector* mem;
98101
BufRec bufrec;
99102
std::function<void()> func;
100103
};
@@ -103,7 +106,12 @@ class EventMgr {
103106

104107
void FreeMemory(const ToFreeVector& to_free) {
105108
for (const auto& iu : to_free) {
106-
delete iu.mem;
109+
if (iu.mem != nullptr) {
110+
for (auto& t : *(iu.mem)) {
111+
t.Unref();
112+
}
113+
delete iu.mem;
114+
}
107115
if (iu.bufrec.buf) iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
108116
// The function must be called in another thread.
109117
if (iu.func != nullptr) threadpool_.Schedule(iu.func);
@@ -118,7 +126,7 @@ class EventMgr {
118126
EXCLUSIVE_LOCKS_REQUIRED(mu_);
119127

120128
void QueueTensors(perftools::gputools::Stream* stream,
121-
std::vector<Tensor>* tensors)
129+
TensorReferenceVector* tensors)
122130
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
123131
QueueInUse(stream, {nullptr, tensors, BufRec(), nullptr});
124132
}

tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class TEST_EventMgrHelper {
4141
}
4242

4343
void QueueTensors(perftools::gputools::Stream* stream,
44-
std::vector<Tensor>* tensors) {
44+
EventMgr::TensorReferenceVector* tensors) {
4545
mutex_lock l(em_->mu_);
4646
em_->QueueTensors(stream, tensors);
4747
}
@@ -77,12 +77,12 @@ TEST(EventMgr, DelayedPolling) {
7777
EventMgr em(stream_exec);
7878
TEST_EventMgrHelper th(&em);
7979
EXPECT_EQ(0, th.queue_size());
80-
std::vector<Tensor>* v = nullptr;
80+
EventMgr::TensorReferenceVector* v = nullptr;
8181
std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
8282
CHECK(stream.get());
8383
stream->Init();
8484
for (int i = 0; i < 5; ++i) {
85-
v = new std::vector<Tensor>;
85+
v = new EventMgr::TensorReferenceVector;
8686
th.QueueTensors(stream.get(), v);
8787
EXPECT_EQ(i + 1, th.queue_size());
8888
EXPECT_EQ(0, th.free_size());
@@ -92,7 +92,7 @@ TEST(EventMgr, DelayedPolling) {
9292
EXPECT_EQ(5, th.free_size());
9393
for (int j = 0; j < 2; ++j) {
9494
for (int i = 0; i < 5; ++i) {
95-
v = new std::vector<Tensor>;
95+
v = new EventMgr::TensorReferenceVector;
9696
th.QueueTensors(stream.get(), v);
9797
EXPECT_EQ(i + 1, th.queue_size());
9898
EXPECT_EQ(4 - i, th.free_size());
@@ -110,12 +110,12 @@ TEST(EventMgr, ImmediatePolling) {
110110
TEST_EventMgrHelper th(&em);
111111
EXPECT_EQ(0, th.queue_size());
112112
EXPECT_EQ(0, th.free_size());
113-
std::vector<Tensor>* v = nullptr;
113+
EventMgr::TensorReferenceVector* v = nullptr;
114114
std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
115115
CHECK(stream.get());
116116
stream->Init();
117117
for (int i = 0; i < 5; ++i) {
118-
v = new std::vector<Tensor>;
118+
v = new EventMgr::TensorReferenceVector;
119119
em.ThenDeleteTensors(stream.get(), v);
120120
EXPECT_EQ(0, th.queue_size());
121121
EXPECT_EQ(1, th.free_size());
@@ -130,12 +130,12 @@ TEST(EventMgr, LongDelayedPolling) {
130130
TEST_EventMgrHelper th(&em);
131131
EXPECT_EQ(0, th.queue_size());
132132
EXPECT_EQ(0, th.free_size());
133-
std::vector<Tensor>* v = nullptr;
133+
EventMgr::TensorReferenceVector* v = nullptr;
134134
std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
135135
CHECK(stream.get());
136136
stream->Init();
137137
for (int i = 0; i < 5; ++i) {
138-
v = new std::vector<Tensor>;
138+
v = new EventMgr::TensorReferenceVector;
139139
th.QueueTensors(stream.get(), v);
140140
EXPECT_EQ(1 + i, th.queue_size());
141141
EXPECT_EQ(0, th.free_size());
@@ -153,12 +153,12 @@ TEST(EventMgr, NonEmptyShutdown) {
153153
TEST_EventMgrHelper th(&em);
154154
EXPECT_EQ(0, th.queue_size());
155155
EXPECT_EQ(0, th.free_size());
156-
std::vector<Tensor>* v = nullptr;
156+
EventMgr::TensorReferenceVector* v = nullptr;
157157
std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
158158
CHECK(stream.get());
159159
stream->Init();
160160
for (int i = 0; i < 5; ++i) {
161-
v = new std::vector<Tensor>;
161+
v = new EventMgr::TensorReferenceVector;
162162
th.QueueTensors(stream.get(), v);
163163
EXPECT_EQ(1 + i, th.queue_size());
164164
EXPECT_EQ(0, th.free_size());

tensorflow/core/common_runtime/gpu/gpu_util.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ limitations under the License.
2323
#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
2424
#include "tensorflow/core/common_runtime/gpu/process_state.h"
2525
#include "tensorflow/core/common_runtime/gpu_device_context.h"
26+
#include "tensorflow/core/framework/tensor_reference.h"
2627
#include "tensorflow/core/framework/types.h"
2728
#include "tensorflow/core/lib/core/errors.h"
2829
#include "tensorflow/core/lib/core/refcount.h"
@@ -91,7 +92,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
9192
DeviceMemoryBase gpu_src_ptr(const_cast<char*>(src_ptr), num_bytes);
9293
stream->ThenMemcpy(mb, gpu_src_ptr, num_bytes);
9394
// Use of tensor may outlive stack scope, so keep a ref.
94-
Tensor* tensor_ref = new Tensor(tensor);
95+
TensorReference tensor_ref(tensor);
9596
dev->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
9697
stream, [stream, done, proto, mb, num_bytes, alloc, tensor_ref]() {
9798
if (!stream->ok()) {
@@ -104,7 +105,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
104105
LOG(FATAL) << "SetProtoFromGPU: GPU Memcpy failed";
105106
return;
106107
}
107-
delete tensor_ref;
108+
tensor_ref.Unref();
108109
port::CopyFromArray(proto->mutable_tensor_content(), mb, num_bytes);
109110
alloc->Deallocate<char>(mb);
110111
done(Status::OK());
@@ -169,10 +170,10 @@ void GPUUtil::CopyViaDMA(const string& edge_name,
169170
total_bytes);
170171
if (dst_device_type == DeviceType(DEVICE_GPU).type()) {
171172
// Use of input may outlive stack scope, so keep a ref.
172-
Tensor* input_ref = new Tensor(*input);
173+
TensorReference input_ref(*input);
173174
src_dev_info->event_mgr->ThenExecute(
174175
stream, [done, stream, input_ref]() {
175-
delete input_ref;
176+
input_ref.Unref();
176177
if (!stream->ok()) {
177178
done(errors::Internal("GPU->GPU Memcpy failed"));
178179
} else {
@@ -262,9 +263,9 @@ void GPUUtil::CopyCPUTensorToGPU(const Tensor* cpu_tensor,
262263
stream->ThenMemcpy(&gpu_dst_ptr, src_ptr, total_bytes);
263264
auto* dev_info = gpu_device->tensorflow_gpu_device_info();
264265
// Use of cpu_tensor may outlive stack scope, so keep a ref.
265-
Tensor* input_ref = new Tensor(*cpu_tensor);
266+
TensorReference input_ref(*cpu_tensor);
266267
dev_info->event_mgr->ThenExecute(stream, [stream, done, input_ref]() {
267-
delete input_ref;
268+
input_ref.Unref();
268269
if (!stream->ok()) {
269270
done(errors::Internal("CopyCPUTensorToGPU: GPU Memcpy failed"));
270271
} else {
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/* Copyright 2015 Google Inc. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#ifndef TENSORFLOW_FRAMEWORK_TENSOR_REFERENCE_H_
17+
#define TENSORFLOW_FRAMEWORK_TENSOR_REFERENCE_H_
18+
19+
#include "tensorflow/core/public/tensor.h"
20+
21+
namespace tensorflow {
22+
23+
// An opaque class that holds a reference to an underlying TensorBuffer.
24+
// Unlike Tensor, it does not have any shape or type information, so
25+
// it is cheaper to construct/move, but the only thing you can really do
26+
// with it is Unref it, which releases one of the references to the underlying
27+
// TensorBuffer.
28+
// IMPORTANT: If you do not call Unref(), you will likely leak tensor memory.
29+
class TensorReference {
30+
public:
31+
explicit TensorReference(const Tensor& tensor) : buf_(tensor.buf_) {
32+
if (buf_) buf_->Ref();
33+
}
34+
35+
~TensorReference() {}
36+
37+
void Unref() const {
38+
if (buf_) buf_->Unref();
39+
}
40+
41+
private:
42+
TensorBuffer* buf_;
43+
};
44+
45+
} // namespace tensorflow
46+
47+
#endif // TENSORFLOW_FRAMEWORK_TENSOR_REFERENCE_H_

tensorflow/core/graph/graph.cc

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ string Node::DebugString() const {
4545
}
4646

4747
Node::Node()
48-
: id_(-1), cost_id_(-1), props_(nullptr), assigned_device_name_() {}
48+
: id_(-1),
49+
cost_id_(-1),
50+
class_(NC_UNINITIALIZED),
51+
props_(nullptr),
52+
assigned_device_name_() {}
4953

5054
Node::~Node() {
5155
if (props_) {
@@ -65,13 +69,43 @@ void Node::Initialize(int id, int cost_id, Properties* props) {
6569
props_->Unref();
6670
}
6771
props_ = props;
72+
// Initialize the class_ based on the type string
73+
const string& ts = this->type_string();
74+
class_ = NC_UNINITIALIZED;
75+
76+
#define SET_CLASS(enum_val, ts, str1, str2) \
77+
do { \
78+
if ((((ts) == (str1)) || ((ts) == (str2)))) { \
79+
/* Cannot be member of more than one class*/ \
80+
CHECK(class_ == NC_UNINITIALIZED); \
81+
class_ = (enum_val); \
82+
} \
83+
} while (0)
84+
85+
SET_CLASS(NC_SWITCH, ts, "Switch", "RefSwitch");
86+
SET_CLASS(NC_MERGE, ts, "Merge", "");
87+
SET_CLASS(NC_ENTER, ts, "Enter", "RefEnter");
88+
SET_CLASS(NC_EXIT, ts, "Exit", "");
89+
SET_CLASS(NC_NEXT_ITERATION, ts, "NextIteration", "");
90+
SET_CLASS(NC_LOOP_COND, ts, "LoopCond", "");
91+
SET_CLASS(NC_CONTROL_TRIGGER, ts, "ControlTrigger", "");
92+
SET_CLASS(NC_SEND, ts, "_Send", "_HostSend");
93+
SET_CLASS(NC_RECV, ts, "_Recv", "_HostRecv");
94+
SET_CLASS(NC_CONSTANT, ts, "Const", "HostConst");
95+
SET_CLASS(NC_VARIABLE, ts, "Variable", "");
96+
SET_CLASS(NC_IDENTITY, ts, "Identity", "RefIdentity");
97+
if (class_ == NC_UNINITIALIZED) {
98+
class_ = NC_OTHER; // Catch all
99+
}
100+
#undef SET_CLASS
68101
}
69102

70103
void Node::Clear() {
71104
in_edges_.clear();
72105
out_edges_.clear();
73106
id_ = -1;
74107
cost_id_ = -1;
108+
class_ = NC_UNINITIALIZED;
75109

76110
if (props_) {
77111
props_->Unref();

0 commit comments

Comments
 (0)