Skip to content

Commit e56bbfc

Browse files
author
Mikael Ronström
committed
BUG#30947735: NODES ARE CRASHING DURING DISK DATA OBJECT CREATION
Approved-by: Frazer Clement <[email protected]> We place the extent pages into the list of SL_LOCKED pages before the data file is ready and before the mutexes are initialised. This is not good since we crash on trying to lock extent pages. These pages are not ready for checkpointing, so we can simply skip them from checkpointing.
1 parent 450c1e6 commit e56bbfc

File tree

3 files changed

+62
-24
lines changed

3 files changed

+62
-24
lines changed

storage/ndb/src/kernel/blocks/pgman.cpp

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3256,33 +3256,44 @@ Pgman::process_lcp_locked(Signal* signal, Ptr<Page_entry> ptr)
32563256
{
32573257
Tablespace_client tsman(signal, this, c_tsman, 0, 0, 0, 0);
32583258
jam();
3259-
tsman.lock_extent_page(ptr.p->m_file_no, ptr.p->m_page_no);
3260-
if ((ptr.p->m_state & Page_entry::DIRTY) &&
3261-
!(ptr.p->m_state & Page_entry::PAGEOUT))
3259+
bool is_file_ready = tsman.is_datafile_ready(ptr.p->m_file_no);
3260+
if (is_file_ready)
32623261
{
3263-
jam();
3264-
Ptr<GlobalPage> org, copy;
3265-
ndbrequire(m_global_page_pool.seize(copy));
3266-
m_global_page_pool.getPtr(org, ptr.p->m_real_page_i);
3267-
memcpy(copy.p, org.p, sizeof(GlobalPage));
3268-
ptr.p->m_copy_page_i = copy.i;
3269-
3270-
ptr.p->m_state |= Page_entry::LCP;
3262+
/**
3263+
* An extent page is placed into SL_LOCKED pages before the
3264+
* data file is ready for use. This means that we haven't even
3265+
* initialised the mutexes yet and also not initialised all
3266+
* the extent pages. Avoid checkpointing those pages until
3267+
* the data file is ready.
3268+
*/
3269+
tsman.lock_extent_page(ptr.p->m_file_no, ptr.p->m_page_no);
3270+
if ((ptr.p->m_state & Page_entry::DIRTY) &&
3271+
!(ptr.p->m_state & Page_entry::PAGEOUT))
3272+
{
3273+
jam();
3274+
Ptr<GlobalPage> org, copy;
3275+
ndbrequire(m_global_page_pool.seize(copy));
3276+
m_global_page_pool.getPtr(org, ptr.p->m_real_page_i);
3277+
memcpy(copy.p, org.p, sizeof(GlobalPage));
3278+
ptr.p->m_copy_page_i = copy.i;
32713279

3272-
DEB_PGMAN_PAGE(("(%u)pageout():extent, page(%u,%u):%u:%x",
3273-
instance(),
3274-
ptr.p->m_file_no,
3275-
ptr.p->m_page_no,
3276-
ptr.i,
3277-
(unsigned int)ptr.p->m_state));
3280+
ptr.p->m_state |= Page_entry::LCP;
32783281

3279-
pageout(signal, ptr);
3280-
m_lcp_outstanding++;
3281-
m_current_lcp_pageouts++;
3282-
m_available_lcp_pageouts_used++;
3283-
break_flag = true;
3282+
DEB_PGMAN_PAGE(("(%u)pageout():extent, page(%u,%u):%u:%x",
3283+
instance(),
3284+
ptr.p->m_file_no,
3285+
ptr.p->m_page_no,
3286+
ptr.i,
3287+
(unsigned int)ptr.p->m_state));
3288+
3289+
pageout(signal, ptr);
3290+
m_lcp_outstanding++;
3291+
m_current_lcp_pageouts++;
3292+
m_available_lcp_pageouts_used++;
3293+
break_flag = true;
3294+
}
3295+
tsman.unlock_extent_page(ptr.p->m_file_no, ptr.p->m_page_no);
32843296
}
3285-
tsman.unlock_extent_page(ptr.p->m_file_no, ptr.p->m_page_no);
32863297
}
32873298

32883299
Page_sublist& pl = *m_page_sublist[Page_entry::SL_LOCKED];

storage/ndb/src/kernel/blocks/tsman.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3428,6 +3428,20 @@ Tsman::client_unlock()
34283428
}
34293429
}
34303430

3431+
bool Tsman::is_datafile_ready(Uint32 file_no)
3432+
{
3433+
Ptr<Datafile> file_ptr;
3434+
Datafile file_key;
3435+
file_key.m_file_no = file_no;
3436+
if (m_file_hash.find(file_ptr, file_key))
3437+
{
3438+
if (file_ptr.p->m_state == Datafile::FS_CREATING)
3439+
return false;
3440+
return true;
3441+
}
3442+
return false;
3443+
}
3444+
34313445
void Tsman::lock_extent_page(Uint32 file_no, Uint32 page_no)
34323446
{
34333447
if (isNdbMtLqh())

storage/ndb/src/kernel/blocks/tsman.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License, version 2.0,
@@ -242,6 +242,7 @@ class Tsman : public SimulatedBlock
242242
void client_unlock();
243243
void client_lock(Uint32 instance);
244244
void client_unlock(Uint32 instance);
245+
bool is_datafile_ready(Uint32 file_no);
245246
void lock_extent_page(Uint32 file_no, Uint32 page_no);
246247
void unlock_extent_page(Uint32 file_no, Uint32 page_no);
247248
void lock_extent_page(Datafile*, Uint32 page_no);
@@ -413,6 +414,11 @@ class Tablespace_client
413414
*/
414415
int unmap_page(Local_key*, Uint32 bits);
415416

417+
/**
418+
* Check if datafile is ready for checkpoints.
419+
*/
420+
bool is_datafile_ready(Uint32 file_no);
421+
416422
/**
417423
* Lock/Unlock extent page to ensure that access to this extent
418424
* page is serialised.
@@ -553,6 +559,13 @@ Tablespace_client::get_page_free_bits(Local_key *key,
553559
return m_tsman->get_page_free_bits(m_signal, key, uncommited, commited);
554560
}
555561

562+
inline
563+
bool
564+
Tablespace_client::is_datafile_ready(Uint32 file_no)
565+
{
566+
return m_tsman->is_datafile_ready(file_no);
567+
}
568+
556569
inline
557570
void
558571
Tablespace_client::lock_extent_page(Uint32 file_no, Uint32 page_no)

0 commit comments

Comments
 (0)