Skip to content

Commit fa20931

Browse files
author
Alejandro Abdelnur
committed
YARN-598. Add virtual cores to queue metrics. (sandyr via tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1480816 13f79535-47bb-0310-9956-ffa450edef68
1 parent 1619add commit fa20931

File tree

3 files changed

+77
-43
lines changed
  • hadoop-yarn-project
    • hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src

3 files changed

+77
-43
lines changed

hadoop-yarn-project/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ Release 2.0.5-beta - UNRELEASED
238238
YARN-568. Add support for work preserving preemption to the FairScheduler.
239239
(Carlo Curino and Sandy Ryza via cdouglas)
240240

241+
YARN-598. Add virtual cores to queue metrics. (sandyr via tucu)
242+
241243
OPTIMIZATIONS
242244

243245
BUG FIXES

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,17 @@ public class QueueMetrics implements MetricsSource {
6060
@Metric("# of apps failed") MutableGaugeInt appsFailed;
6161

6262
@Metric("Allocated memory in MB") MutableGaugeInt allocatedMB;
63+
@Metric("Allocated CPU in virtual cores") MutableGaugeInt allocatedVCores;
6364
@Metric("# of allocated containers") MutableGaugeInt allocatedContainers;
6465
@Metric("Aggregate # of allocated containers") MutableCounterLong aggregateContainersAllocated;
6566
@Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased;
6667
@Metric("Available memory in MB") MutableGaugeInt availableMB;
68+
@Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores;
6769
@Metric("Pending memory allocation in MB") MutableGaugeInt pendingMB;
70+
@Metric("Pending CPU allocation in virtual cores") MutableGaugeInt pendingVCores;
6871
@Metric("# of pending containers") MutableGaugeInt pendingContainers;
6972
@Metric("# of reserved memory in MB") MutableGaugeInt reservedMB;
73+
@Metric("Reserved CPU in virtual cores") MutableGaugeInt reservedVCores;
7074
@Metric("# of reserved containers") MutableGaugeInt reservedContainers;
7175
@Metric("# of active users") MutableGaugeInt activeUsers;
7276
@Metric("# of active users") MutableGaugeInt activeApplications;
@@ -268,6 +272,7 @@ public void finishApp(AppSchedulingInfo app,
268272
*/
269273
public void setAvailableResourcesToQueue(Resource limit) {
270274
availableMB.set(limit.getMemory());
275+
availableVCores.set(limit.getVirtualCores());
271276
}
272277

273278
/**
@@ -304,6 +309,7 @@ public void incrPendingResources(String user, int containers, Resource res) {
304309
private void _incrPendingResources(int containers, Resource res) {
305310
pendingContainers.incr(containers);
306311
pendingMB.incr(res.getMemory());
312+
pendingVCores.incr(res.getVirtualCores());
307313
}
308314

309315
public void decrPendingResources(String user, int containers, Resource res) {
@@ -320,12 +326,14 @@ public void decrPendingResources(String user, int containers, Resource res) {
320326
private void _decrPendingResources(int containers, Resource res) {
321327
pendingContainers.decr(containers);
322328
pendingMB.decr(res.getMemory());
329+
pendingVCores.decr(res.getVirtualCores());
323330
}
324331

325332
public void allocateResources(String user, int containers, Resource res) {
326333
allocatedContainers.incr(containers);
327334
aggregateContainersAllocated.incr(containers);
328335
allocatedMB.incr(res.getMemory() * containers);
336+
allocatedVCores.incr(res.getVirtualCores() * containers);
329337
_decrPendingResources(containers, Resources.multiply(res, containers));
330338
QueueMetrics userMetrics = getUserMetrics(user);
331339
if (userMetrics != null) {
@@ -340,6 +348,7 @@ public void releaseResources(String user, int containers, Resource res) {
340348
allocatedContainers.decr(containers);
341349
aggregateContainersReleased.incr(containers);
342350
allocatedMB.decr(res.getMemory() * containers);
351+
allocatedVCores.decr(res.getVirtualCores() * containers);
343352
QueueMetrics userMetrics = getUserMetrics(user);
344353
if (userMetrics != null) {
345354
userMetrics.releaseResources(user, containers, res);
@@ -352,6 +361,7 @@ public void releaseResources(String user, int containers, Resource res) {
352361
public void reserveResource(String user, Resource res) {
353362
reservedContainers.incr();
354363
reservedMB.incr(res.getMemory());
364+
reservedVCores.incr(res.getVirtualCores());
355365
QueueMetrics userMetrics = getUserMetrics(user);
356366
if (userMetrics != null) {
357367
userMetrics.reserveResource(user, res);
@@ -364,6 +374,7 @@ public void reserveResource(String user, Resource res) {
364374
public void unreserveResource(String user, Resource res) {
365375
reservedContainers.decr();
366376
reservedMB.decr(res.getMemory());
377+
reservedVCores.decr(res.getVirtualCores());
367378
QueueMetrics userMetrics = getUserMetrics(user);
368379
if (userMetrics != null) {
369380
userMetrics.unreserveResource(user, res);
@@ -434,6 +445,10 @@ public Resource getAllocatedResources() {
434445
public int getAllocatedMB() {
435446
return allocatedMB.value();
436447
}
448+
449+
public int getAllocatedVirtualCores() {
450+
return allocatedVCores.value();
451+
}
437452

438453
public int getAllocatedContainers() {
439454
return allocatedContainers.value();
@@ -442,10 +457,18 @@ public int getAllocatedContainers() {
442457
public int getAvailableMB() {
443458
return availableMB.value();
444459
}
460+
461+
public int getAvailableVirtualCores() {
462+
return availableVCores.value();
463+
}
445464

446465
public int getPendingMB() {
447466
return pendingMB.value();
448467
}
468+
469+
public int getPendingVirtualCores() {
470+
return pendingVCores.value();
471+
}
449472

450473
public int getPendingContainers() {
451474
return pendingContainers.value();
@@ -454,6 +477,10 @@ public int getPendingContainers() {
454477
public int getReservedMB() {
455478
return reservedMB.value();
456479
}
480+
481+
public int getReservedVirtualCores() {
482+
return reservedVCores.value();
483+
}
457484

458485
public int getReservedContainers() {
459486
return reservedContainers.value();

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java

Lines changed: 48 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -66,20 +66,20 @@ public void setUp() {
6666
MetricsSource userSource = userSource(ms, queueName, user);
6767
checkApps(queueSource, 1, 1, 0, 0, 0, 0);
6868

69-
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB));
70-
metrics.incrPendingResources(user, 5, Resources.createResource(15*GB));
69+
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100));
70+
metrics.incrPendingResources(user, 5, Resources.createResource(15*GB, 15));
7171
// Available resources is set externally, as it depends on dynamic
7272
// configurable cluster/queue resources
73-
checkResources(queueSource, 0, 0, 0, 0, 100*GB, 15*GB, 5, 0, 0);
73+
checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
7474

7575
metrics.incrAppsRunning(app, user);
7676
checkApps(queueSource, 1, 0, 1, 0, 0, 0);
7777

78-
metrics.allocateResources(user, 3, Resources.createResource(2*GB));
79-
checkResources(queueSource, 6*GB, 3, 3, 0, 100*GB, 9*GB, 2, 0, 0);
78+
metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2));
79+
checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
8080

81-
metrics.releaseResources(user, 1, Resources.createResource(2*GB));
82-
checkResources(queueSource, 4*GB, 2, 3, 1, 100*GB, 9*GB, 2, 0, 0);
81+
metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2));
82+
checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
8383

8484
metrics.finishApp(app, RMAppAttemptState.FINISHED);
8585
checkApps(queueSource, 1, 0, 0, 1, 0, 0);
@@ -148,25 +148,25 @@ public void testQueueAppMetricsForMultipleFailures() {
148148
checkApps(queueSource, 1, 1, 0, 0, 0, 0);
149149
checkApps(userSource, 1, 1, 0, 0, 0, 0);
150150

151-
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB));
152-
metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB));
153-
metrics.incrPendingResources(user, 5, Resources.createResource(15*GB));
151+
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100));
152+
metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10));
153+
metrics.incrPendingResources(user, 5, Resources.createResource(15*GB, 15));
154154
// Available resources is set externally, as it depends on dynamic
155155
// configurable cluster/queue resources
156-
checkResources(queueSource, 0, 0, 0, 0, 100*GB, 15*GB, 5, 0, 0);
157-
checkResources(userSource, 0, 0, 0, 0, 10*GB, 15*GB, 5, 0, 0);
156+
checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
157+
checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
158158

159159
metrics.incrAppsRunning(app, user);
160160
checkApps(queueSource, 1, 0, 1, 0, 0, 0);
161161
checkApps(userSource, 1, 0, 1, 0, 0, 0);
162162

163-
metrics.allocateResources(user, 3, Resources.createResource(2*GB));
164-
checkResources(queueSource, 6*GB, 3, 3, 0, 100*GB, 9*GB, 2, 0, 0);
165-
checkResources(userSource, 6*GB, 3, 3, 0, 10*GB, 9*GB, 2, 0, 0);
163+
metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2));
164+
checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
165+
checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
166166

167-
metrics.releaseResources(user, 1, Resources.createResource(2*GB));
168-
checkResources(queueSource, 4*GB, 2, 3, 1, 100*GB, 9*GB, 2, 0, 0);
169-
checkResources(userSource, 4*GB, 2, 3, 1, 10*GB, 9*GB, 2, 0, 0);
167+
metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2));
168+
checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
169+
checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
170170

171171
metrics.finishApp(app, RMAppAttemptState.FINISHED);
172172
checkApps(queueSource, 1, 0, 0, 1, 0, 0);
@@ -197,35 +197,35 @@ public void testQueueAppMetricsForMultipleFailures() {
197197
checkApps(userSource, 1, 1, 0, 0, 0, 0);
198198
checkApps(parentUserSource, 1, 1, 0, 0, 0, 0);
199199

200-
parentMetrics.setAvailableResourcesToQueue(Resources.createResource(100*GB));
201-
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB));
202-
parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB));
203-
metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB));
204-
metrics.incrPendingResources(user, 5, Resources.createResource(15*GB));
205-
checkResources(queueSource, 0, 0, 0, 0, 100*GB, 15*GB, 5, 0, 0);
206-
checkResources(parentQueueSource, 0, 0, 0, 0, 100*GB, 15*GB, 5, 0, 0);
207-
checkResources(userSource, 0, 0, 0, 0, 10*GB, 15*GB, 5, 0, 0);
208-
checkResources(parentUserSource, 0, 0, 0, 0, 10*GB, 15*GB, 5, 0, 0);
200+
parentMetrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100));
201+
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100));
202+
parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10));
203+
metrics.setAvailableResourcesToUser(user, Resources.createResource(10*GB, 10));
204+
metrics.incrPendingResources(user, 5, Resources.createResource(15*GB, 15));
205+
checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
206+
checkResources(parentQueueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
207+
checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
208+
checkResources(parentUserSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
209209

210210
metrics.incrAppsRunning(app, user);
211211
checkApps(queueSource, 1, 0, 1, 0, 0, 0);
212212
checkApps(userSource, 1, 0, 1, 0, 0, 0);
213213

214-
metrics.allocateResources(user, 3, Resources.createResource(2*GB));
215-
metrics.reserveResource(user, Resources.createResource(3*GB));
214+
metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2));
215+
metrics.reserveResource(user, Resources.createResource(3*GB, 3));
216216
// Available resources is set externally, as it depends on dynamic
217217
// configurable cluster/queue resources
218-
checkResources(queueSource, 6*GB, 3, 3, 0, 100*GB, 9*GB, 2, 3*GB, 1);
219-
checkResources(parentQueueSource, 6*GB, 3, 3, 0, 100*GB, 9*GB, 2, 3*GB, 1);
220-
checkResources(userSource, 6*GB, 3, 3, 0, 10*GB, 9*GB, 2, 3*GB, 1);
221-
checkResources(parentUserSource, 6*GB, 3, 3, 0, 10*GB, 9*GB, 2, 3*GB, 1);
222-
223-
metrics.releaseResources(user, 1, Resources.createResource(2*GB));
224-
metrics.unreserveResource(user, Resources.createResource(3*GB));
225-
checkResources(queueSource, 4*GB, 2, 3, 1, 100*GB, 9*GB, 2, 0, 0);
226-
checkResources(parentQueueSource, 4*GB, 2, 3, 1, 100*GB, 9*GB, 2, 0, 0);
227-
checkResources(userSource, 4*GB, 2, 3, 1, 10*GB, 9*GB, 2, 0, 0);
228-
checkResources(parentUserSource, 4*GB, 2, 3, 1, 10*GB, 9*GB, 2, 0, 0);
218+
checkResources(queueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1);
219+
checkResources(parentQueueSource, 6*GB, 6, 3, 3, 0, 100*GB, 100, 9*GB, 9, 2, 3*GB, 3, 1);
220+
checkResources(userSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1);
221+
checkResources(parentUserSource, 6*GB, 6, 3, 3, 0, 10*GB, 10, 9*GB, 9, 2, 3*GB, 3, 1);
222+
223+
metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2));
224+
metrics.unreserveResource(user, Resources.createResource(3*GB, 3));
225+
checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
226+
checkResources(parentQueueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
227+
checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
228+
checkResources(parentUserSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
229229

230230
metrics.finishApp(app, RMAppAttemptState.FINISHED);
231231
checkApps(queueSource, 1, 0, 0, 1, 0, 0);
@@ -277,18 +277,23 @@ public static void checkApps(MetricsSource source, int submitted, int pending,
277277
}
278278

279279
public static void checkResources(MetricsSource source, int allocatedMB,
280-
int allocCtnrs, long aggreAllocCtnrs, long aggreReleasedCtnrs,
281-
int availableMB, int pendingMB, int pendingCtnrs,
282-
int reservedMB, int reservedCtnrs) {
280+
int allocatedCores, int allocCtnrs, long aggreAllocCtnrs,
281+
long aggreReleasedCtnrs, int availableMB, int availableCores, int pendingMB,
282+
int pendingCores, int pendingCtnrs, int reservedMB, int reservedCores,
283+
int reservedCtnrs) {
283284
MetricsRecordBuilder rb = getMetrics(source);
284285
assertGauge("AllocatedMB", allocatedMB, rb);
286+
assertGauge("AllocatedVCores", allocatedCores, rb);
285287
assertGauge("AllocatedContainers", allocCtnrs, rb);
286288
assertCounter("AggregateContainersAllocated", aggreAllocCtnrs, rb);
287289
assertCounter("AggregateContainersReleased", aggreReleasedCtnrs, rb);
288290
assertGauge("AvailableMB", availableMB, rb);
291+
assertGauge("AvailableVCores", availableCores, rb);
289292
assertGauge("PendingMB", pendingMB, rb);
293+
assertGauge("PendingVCores", pendingCores, rb);
290294
assertGauge("PendingContainers", pendingCtnrs, rb);
291295
assertGauge("ReservedMB", reservedMB, rb);
296+
assertGauge("ReservedVCores", reservedCores, rb);
292297
assertGauge("ReservedContainers", reservedCtnrs, rb);
293298
}
294299

0 commit comments

Comments
 (0)