Skip to content

Commit e5a3b56

Browse files
committed
Bug#36363119 Post restart ndbmtd is not able to come up
Fix regression in LQH operation pool iteration which can cause some operations to be missed, resulting in orphaned operations, holding locks, affecting transaction processing and node recovery. New MTR test added to cover this specific scenario. ndb_tctakeover2 Change-Id: I35867a4e2b5d5a13292824058a9bdb205b81831d
1 parent 38b7bb7 commit e5a3b56

File tree

5 files changed

+242
-61
lines changed

5 files changed

+242
-61
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
Create table with non standard partitioning
2+
allowing colocation of operations in a single LDM
3+
create table t1 (a int, b int, c int, primary key(a,b)) engine=ndb partition by key(a);
4+
Insert some rows
5+
All will be on the same partition, LDM instance, nodes
6+
insert into t1 values (1,1,1);
7+
insert into t1 select 1, b+1, 1 from t1;
8+
insert into t1 select 1, b+2, 1 from t1;
9+
insert into t1 select 1, b+4, 1 from t1;
10+
insert into t1 select 1, b+8, 1 from t1;
11+
insert into t1 select 1, b+16, 1 from t1;
12+
insert into t1 select 1, b+32, 1 from t1;
13+
insert into t1 select 1, b+64, 1 from t1;
14+
insert into t1 select 1, b+128, 1 from t1;
15+
insert into t1 select 1, b+256, 1 from t1;
16+
Check LQH operation entry size and records per page.
17+
Usable words on page are 32768 - 128
18+
Extra pages is 0
19+
Start a transaction with a number of operations
20+
100 ops makes it easier to identify in case of noise
21+
begin;
22+
update t1 set c=c+1 limit 100;
23+
Determine TC
24+
Determine other node
25+
Determine target number of ops
26+
as multiple of records per page
27+
With optional extra pages to test dynamic sub pool limit
28+
Check targetops not too small
29+
enoughTargetOps
30+
1
31+
Subtract 200 ops
32+
100 for those already created
33+
100 reserved for DBUTIL usage
34+
Consume ops up to target
35+
Restart TC node
36+
Wait for not started state
37+
Check no operations from failed TC remain (must be none)
38+
select * from ndbinfo.cluster_operations where tc_node_id=2;;
39+
node_id block_instance transid operation_type state tableid fragmentid client_node_id client_block_ref tc_node_id tc_block_no tc_block_instance
40+
Start TC node
41+
Wait for all started
42+
OK
43+
rollback;
44+
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from NDBCLUSTER
45+
Extra pages is 1
46+
Start a transaction with a number of operations
47+
100 ops makes it easier to identify in case of noise
48+
begin;
49+
update t1 set c=c+1 limit 100;
50+
Determine TC
51+
Determine other node
52+
Determine target number of ops
53+
as multiple of records per page
54+
With optional extra pages to test dynamic sub pool limit
55+
Check targetops not too small
56+
enoughTargetOps
57+
1
58+
Subtract 200 ops
59+
100 for those already created
60+
100 reserved for DBUTIL usage
61+
Consume ops up to target
62+
Restart TC node
63+
Wait for not started state
64+
Check no operations from failed TC remain (must be none)
65+
select * from ndbinfo.cluster_operations where tc_node_id=1;;
66+
node_id block_instance transid operation_type state tableid fragmentid client_node_id client_block_ref tc_node_id tc_block_no tc_block_instance
67+
Start TC node
68+
Wait for all started
69+
OK
70+
rollback;
71+
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from NDBCLUSTER
72+
drop table t1;
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
--source include/have_ndb.inc
2+
3+
--echo Create table with non standard partitioning
4+
--echo allowing colocation of operations in a single LDM
5+
create table t1 (a int, b int, c int, primary key(a,b)) engine=ndb partition by key(a);
6+
7+
--echo Insert some rows
8+
--echo All will be on the same partition, LDM instance, nodes
9+
insert into t1 values (1,1,1);
10+
insert into t1 select 1, b+1, 1 from t1;
11+
insert into t1 select 1, b+2, 1 from t1;
12+
insert into t1 select 1, b+4, 1 from t1;
13+
insert into t1 select 1, b+8, 1 from t1;
14+
insert into t1 select 1, b+16, 1 from t1;
15+
insert into t1 select 1, b+32, 1 from t1;
16+
insert into t1 select 1, b+64, 1 from t1;
17+
insert into t1 select 1, b+128, 1 from t1;
18+
insert into t1 select 1, b+256, 1 from t1;
19+
20+
--echo Check LQH operation entry size and records per page.
21+
--echo Usable words on page are 32768 - 128
22+
let $rpp = query_get_value("select floor((32768-128)/entry_size) as recs_per_page from ndbinfo.ndb\\\$pools where pool_name='LQH Operation Record' LIMIT 1", recs_per_page, 1);
23+
#--echo rpp is $rpp
24+
25+
let $extra_pages=0;
26+
27+
while ($extra_pages < 2)
28+
{
29+
--echo Extra pages is $extra_pages
30+
31+
--echo Start a transaction with a number of operations
32+
--echo 100 ops makes it easier to identify in case of noise
33+
begin;
34+
update t1 set c=c+1 limit 100;
35+
36+
--echo Determine TC
37+
let $tcnode= query_get_value('select node_id from ndbinfo.cluster_transactions where count_operations=100', node_id, 1);
38+
#--echo tcnode is $tcnode
39+
40+
--echo Determine other node
41+
let $survnode= query_get_value('select distinct(node_id) from ndbinfo.cluster_operations where node_id!=$tcnode', node_id, 1);
42+
#--echo Non TC node with operations is $survnode
43+
44+
--echo Determine target number of ops
45+
--echo as multiple of records per page
46+
--echo With optional extra pages to test dynamic sub pool limit
47+
let $targetops= query_get_value("select max(total), $extra_pages * $rpp, $rpp * ($extra_pages + floor((max(total) + $rpp - 1) / $rpp)) as target from ndbinfo.ndb\\\$pools where pool_name='LQH Operation Record' and node_id=$survnode", target, 1);
48+
#--echo Target operations $targetops
49+
50+
--echo Check targetops not too small
51+
--disable_query_log
52+
--eval select $targetops > 200 as enoughTargetOps;
53+
--enable_query_log
54+
55+
--echo Subtract 200 ops
56+
--echo 100 for those already created
57+
--echo 100 reserved for DBUTIL usage
58+
let $extraops = query_get_value("select $targetops - 200 as extraops", extraops, 1);
59+
60+
#--echo ExtraOps is $extraops
61+
62+
--echo Consume ops up to target
63+
64+
--disable_query_log
65+
while ($extraops > 0)
66+
{
67+
update t1 set c=c+1 limit 1;
68+
dec $extraops;
69+
}
70+
--enable_query_log
71+
72+
#select node_id, block_instance, count(1) from ndbinfo.cluster_operations group by node_id, block_instance;
73+
#select * from ndbinfo.ndb$pools where pool_name like "LQH Operation Records";
74+
#select node_id, block_instance, min(user_ptr), max(user_ptr) from ndbinfo.ndb$acc_operations group by node_id, block_instance;
75+
76+
--echo Restart TC node
77+
exec $NDB_MGM -e "$tcnode restart -a -n" >> $NDB_TOOLS_OUTPUT;
78+
79+
--echo Wait for not started state
80+
exec $NDB_WAITER --not-started -w $tcnode >> $NDB_TOOLS_OUTPUT;
81+
82+
--echo Check no operations from failed TC remain (must be none)
83+
--eval select * from ndbinfo.cluster_operations where tc_node_id=$tcnode;
84+
85+
--echo Start TC node
86+
exec $NDB_MGM -e "$tcnode start" >> $NDB_TOOLS_OUTPUT;
87+
88+
--echo Wait for all started
89+
exec $NDB_WAITER >> $NDB_TOOLS_OUTPUT;
90+
91+
--echo OK
92+
93+
--error 1297
94+
rollback;
95+
96+
--inc $extra_pages
97+
}
98+
99+
drop table t1;
100+
101+
remove_file $NDB_TOOLS_OUTPUT;
102+
103+
104+

storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp

Lines changed: 64 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -13027,62 +13027,65 @@ void Dblqh::lqhTransNextLab(Signal *signal, TcNodeFailRecordPtr tcNodeFailPtr) {
1302713027
TcConnectionrecPtr tcConnectptr;
1302813028
tcConnectptr.i = tcNodeFailPtr.p->tcRecNow;
1302913029
for (Uint32 i = 0; i < 100; i++) {
13030-
bool found = getNextTcConRec(tcNodeFailPtr.p->tcRecNow, tcConnectptr, 10);
13031-
if (tcNodeFailPtr.p->tcRecNow != RNIL && !found) {
13032-
/**
13033-
* We scanned without finding any records for a long
13034-
* time, thus we will treat this as looping 10 times
13035-
* in this loop.
13036-
*/
13037-
jam();
13038-
i += 10;
13039-
continue;
13040-
} else if (tcNodeFailPtr.p->tcRecNow == RNIL) {
13041-
jam();
13042-
/**
13043-
* Finished with scanning operation record
13044-
*
13045-
* now scan markers
13046-
*/
13030+
if (!getNextTcConRec(tcNodeFailPtr.p->tcRecNow, tcConnectptr, 10)) {
13031+
if (tcNodeFailPtr.p->tcRecNow != RNIL) {
13032+
/**
13033+
* We scanned without finding any records for a long
13034+
* time, thus we will treat this as looping 10 times
13035+
* in this loop.
13036+
*/
13037+
jam();
13038+
i += 10;
13039+
continue;
13040+
} else {
13041+
jam();
13042+
/**
13043+
* Finished with scanning operation record
13044+
*
13045+
* now scan markers
13046+
*/
1304713047
#ifdef ERROR_INSERT
13048-
if (ERROR_INSERTED(5061)) {
13049-
CLEAR_ERROR_INSERT_VALUE;
13050-
for (Uint32 i = 0; i < cnoOfNodes; i++) {
13051-
Uint32 node = cnodeData[i];
13052-
if (node != getOwnNodeId() && cnodeStatus[i] == ZNODE_UP) {
13053-
g_eventLogger->info("clearing ERROR_INSERT in LQH:%u", node);
13054-
signal->theData[0] = 0;
13055-
sendSignal(numberToRef(getDBLQH(), node), GSN_NDB_TAMPER, signal, 1,
13056-
JBB);
13048+
if (ERROR_INSERTED(5061)) {
13049+
CLEAR_ERROR_INSERT_VALUE;
13050+
for (Uint32 i = 0; i < cnoOfNodes; i++) {
13051+
Uint32 node = cnodeData[i];
13052+
if (node != getOwnNodeId() && cnodeStatus[i] == ZNODE_UP) {
13053+
g_eventLogger->info("clearing ERROR_INSERT in LQH:%u", node);
13054+
signal->theData[0] = 0;
13055+
sendSignal(numberToRef(getDBLQH(), node), GSN_NDB_TAMPER, signal,
13056+
1, JBB);
13057+
}
1305713058
}
13059+
13060+
signal->theData[0] = ZSCAN_MARKERS;
13061+
signal->theData[1] = tcNodeFailPtr.i;
13062+
signal->theData[2] = 0;
13063+
sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 5000, 3);
13064+
return;
1305813065
}
1305913066

13060-
signal->theData[0] = ZSCAN_MARKERS;
13061-
signal->theData[1] = tcNodeFailPtr.i;
13062-
signal->theData[2] = 0;
13063-
sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 5000, 3);
13067+
if (ERROR_INSERTED(5050)) {
13068+
g_eventLogger->info(
13069+
"send ZSCAN_MARKERS with 5s delay and killing master: %u",
13070+
c_master_node_id);
13071+
CLEAR_ERROR_INSERT_VALUE;
13072+
signal->theData[0] = ZSCAN_MARKERS;
13073+
signal->theData[1] = tcNodeFailPtr.i;
13074+
signal->theData[2] = 0;
13075+
sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 5000, 3);
13076+
13077+
signal->theData[0] = 9999;
13078+
sendSignal(numberToRef(CMVMI, c_error_insert_extra), GSN_NDB_TAMPER,
13079+
signal, 1, JBB);
13080+
return;
13081+
}
13082+
#endif
13083+
scanMarkers(signal, tcNodeFailPtr.i, 0);
1306413084
return;
13065-
}
13066-
13067-
if (ERROR_INSERTED(5050)) {
13068-
g_eventLogger->info(
13069-
"send ZSCAN_MARKERS with 5s delay and killing master: %u",
13070-
c_master_node_id);
13071-
CLEAR_ERROR_INSERT_VALUE;
13072-
signal->theData[0] = ZSCAN_MARKERS;
13073-
signal->theData[1] = tcNodeFailPtr.i;
13074-
signal->theData[2] = 0;
13075-
sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 5000, 3);
13085+
} // if
13086+
} // if (!getNextTcConRec())
1307613087

13077-
signal->theData[0] = 9999;
13078-
sendSignal(numberToRef(CMVMI, c_error_insert_extra), GSN_NDB_TAMPER,
13079-
signal, 1, JBB);
13080-
return;
13081-
}
13082-
#endif
13083-
scanMarkers(signal, tcNodeFailPtr.i, 0);
13084-
return;
13085-
} // if
13088+
/* Found an operation record */
1308613089
if (tcConnectptr.p->transactionState != TcConnectionrec::IDLE) {
1308713090
if (tcConnectptr.p->transactionState !=
1308813091
TcConnectionrec::TC_NOT_CONNECTED) {
@@ -19200,15 +19203,18 @@ void Dblqh::scanTcConnectLab(Signal *signal, Uint32 tstartTcConnect,
1920019203
TcConnectionrecPtr tcConnectptr;
1920119204
Uint32 next = tstartTcConnect;
1920219205
for (Uint32 i = 0; i < 200; i++) {
19203-
bool found = getNextTcConRec(next, tcConnectptr, 10);
19204-
if (next != RNIL && !found) {
19205-
jam();
19206-
i += 10;
19207-
continue;
19208-
} else if (next == RNIL) {
19209-
jam();
19210-
break;
19206+
if (!getNextTcConRec(next, tcConnectptr, 10)) {
19207+
if (next != RNIL) {
19208+
jam();
19209+
i += 10;
19210+
continue;
19211+
} else {
19212+
/* Scan done */
19213+
jam();
19214+
break;
19215+
}
1921119216
}
19217+
/* Examine next record */
1921219218
if (tcConnectptr.p->transactionState != TcConnectionrec::IDLE) {
1921319219
switch (tcConnectptr.p->logWriteState) {
1921419220
case TcConnectionrec::NOT_WRITTEN:

storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3911,8 +3911,6 @@ class Dbtup : public SimulatedBlock {
39113911

39123912
private:
39133913
void release_c_free_scan_lock();
3914-
bool getNextTcConRec(Uint32 &next, OperationrecPtr &opPtr, Uint32 max_loops);
3915-
39163914
void checkPoolShrinkNeed(Uint32 pool_index,
39173915
const TransientFastSlotPool &pool);
39183916
void sendPoolShrink(Uint32 pool_index);

storage/ndb/src/kernel/vm/ComposedSlotPool.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ inline void ComposedSlotPool<Pool1, Pool2>::init(Uint32 type_id,
241241
const Uint32 req_recs = *min_recs;
242242
Uint32 pool1_recs = req_recs;
243243
m_pool1.init(type_id, slot_size, &pool1_recs, pool_ctx);
244-
Uint32 pool2_recs = req_recs - pool1_recs;
244+
// If pool1 failed to allocate all, spill remaining to pool2
245+
Uint32 pool2_recs = (pool1_recs < req_recs) ? req_recs - pool1_recs : 0;
245246
m_pool2.init(type_id, slot_size, &pool2_recs, pool_ctx);
246247
*min_recs = pool1_recs + pool2_recs;
247248

0 commit comments

Comments
 (0)