Skip to content

Commit f66a67a

Browse files
committed
SERVER-35793 Add concurrency test for transactions metrics
1 parent 3d7d2b7 commit f66a67a

File tree

2 files changed

+131
-0
lines changed

2 files changed

+131
-0
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
'use strict';
2+
3+
/**
4+
* This test checks high level invariants of various transaction related metrics reported in
5+
* serverStatus and currentOp.
6+
*
7+
* @tags: [uses_transactions]
8+
*/
9+
10+
load('jstests/concurrency/fsm_libs/extend_workload.js'); // for extendWorkload
11+
load('jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation.js');
12+
// for $config
13+
14+
var $config = extendWorkload($config, function($config, $super) {
15+
16+
/**
17+
* Returns all elements in the given array that evaluate to false for the given predicate
18+
* function 'predFn'.
19+
*/
20+
function filterFalse(arr, predFn) {
21+
return arr.filter(x => !predFn(x));
22+
}
23+
24+
/**
25+
* serverStatus invariant: currentActive + currentInactive = currentOpen
26+
*/
27+
function activePlusInactiveEqualsOpen(serverStatusTxnStats) {
28+
// Stats are returned in NumberLong type. Convert to Number type so we are sure comparison
29+
// works as expected.
30+
let active = Number(serverStatusTxnStats["currentActive"]);
31+
let inactive = Number(serverStatusTxnStats["currentInactive"]);
32+
let open = Number(serverStatusTxnStats["currentOpen"]);
33+
return (active + inactive) === open;
34+
}
35+
36+
/**
37+
* serverStatus invariant: totalCommitted + totalAborted + currentOpen = totalStarted
38+
*/
39+
function committedPlusAbortedPlusOpenEqualsStarted(serverStatusTxnStats) {
40+
let committed = Number(serverStatusTxnStats["totalCommitted"]);
41+
let aborted = Number(serverStatusTxnStats["totalAborted"]);
42+
let open = Number(serverStatusTxnStats["currentOpen"]);
43+
let started = Number(serverStatusTxnStats["totalStarted"]);
44+
return (committed + aborted + open) === started;
45+
}
46+
47+
/**
48+
* Check invariants of transactions metrics reported in 'serverStatus' (server-wide metrics),
49+
* using the number of given samples.
50+
*
51+
* Inside the server, these metrics are tracked individually with atomic counters, but there
52+
* is no guarantee that two separate counters are updated atomically. There may be a delay
53+
* between the update of one counter (e.g. 'currentOpen') and another counter (e.g.
54+
* 'totalAborted'). This means that some invariants may not strictly hold at all times. The
55+
* assumption is that when these invariants are broken due to these non atomic updates, they
56+
* are broken for an extremely short period of time, and therefore only appear very rarely
57+
* when sampling the output of these metrics. We base the testing strategy below on this
58+
* assumption. Instead of asserting that a particular invariant holds 100% of the time, we
59+
* assert something slightly weaker i.e. that the invariant holds, for example, 95% percent
60+
* of the time. The error bounds for this test were determined somewhat empirically, but
61+
* they were kept very conservative. One goal of these tests is to ensure that if a change
62+
* was made that broke these metrics significantly, it would be picked up by these tests.
63+
* This test should not be sensitive to small fluctuations in metrics output.
64+
*/
65+
function checkServerStatusInvariants(db, nSamples) {
66+
// Sample serverStatus several times, sleeping a bit in between.
67+
let samples = [];
68+
for (let i = 0; i < nSamples; ++i) {
69+
let txnStats = db.adminCommand({serverStatus: 1}).transactions;
70+
samples.push(txnStats);
71+
sleep(50); // milliseconds.
72+
}
73+
74+
// We consider an invariant failure rate of 5% within a large enough sample to be acceptable
75+
// For example, in a batch of 100 metrics samples, we would accept <= 5 violations of a
76+
// particular invariant.
77+
let maxErrPct = 0.05;
78+
79+
let failedSamples = filterFalse(samples, activePlusInactiveEqualsOpen);
80+
let errRate = failedSamples.length / samples.length;
81+
assertAlways.lte(errRate, maxErrPct, () => {
82+
let failedSamplesStr = failedSamples.map(tojsononeline).join("\n");
83+
return "'activePlusInactiveEqualsOpen' invariant violated. Failed samples: " +
84+
failedSamplesStr;
85+
});
86+
87+
failedSamples = filterFalse(samples, committedPlusAbortedPlusOpenEqualsStarted);
88+
errRate = failedSamples.length / samples.length;
89+
assertAlways.lte(errRate, maxErrPct, () => {
90+
let failedSamplesStr = failedSamples.map(tojsononeline).join("\n");
91+
return "'committedPlusAbortedPlusOpenEqualsStarted' invariant violated." +
92+
"Failed samples: " + failedSamplesStr;
93+
});
94+
}
95+
96+
$config.teardown = function(db, collName, cluster) {
97+
// Check the server-wide invariants one last time, with only a single sample, since all user
98+
// operations should have finished.
99+
checkServerStatusInvariants(db, 1);
100+
$super.teardown.apply(this, arguments);
101+
};
102+
103+
$config.states.checkInvariants = function checkInvariants(db, collName) {
104+
105+
// Check server-wide invariants using 100 samples. This sample size is deemed big enough to
106+
// account for transient inconsistencies, which we assume are rare.
107+
let nSamples = 100;
108+
checkServerStatusInvariants(db, nSamples);
109+
110+
// Check currentOp metrics invariants for all running transactions. These timing related
111+
// invariants are expected to always hold.
112+
let currentOp = db.currentOp({"transaction": {$exists: true}});
113+
currentOp.inprog.forEach((op) => {
114+
let txnStats = op.transaction;
115+
let timeActive = Number(txnStats["timeActiveMicros"]);
116+
let timeInactive = Number(txnStats["timeInactiveMicros"]);
117+
let timeOpen = Number(txnStats["timeOpenMicros"]);
118+
assertAlways.eq(timeActive + timeInactive, timeOpen, () => tojson(txnStats));
119+
});
120+
121+
};
122+
123+
$config.transitions = {
124+
init: {update: 0.9, checkInvariants: 0.1},
125+
update: {update: 0.9, checkInvariants: 0.1},
126+
checkInvariants: {update: 1.0}
127+
};
128+
129+
return $config;
130+
});

jstests/noPassthrough/libs/backup_restore.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ var BackupRestoreTest = function(options) {
149149
'multi_statement_transaction_atomicity_isolation_multi_db.js',
150150
'multi_statement_transaction_atomicity_isolation_repeated_reads.js',
151151
'multi_statement_transaction_kill_sessions_atomicity_isolation.js',
152+
'multi_statement_transaction_atomicity_isolation_metrics_test.js',
152153
'multi_statement_transaction_simple.js',
153154
'multi_statement_transaction_simple_repeated_reads.js',
154155
'reindex_background.js',

0 commit comments

Comments
 (0)