Skip to content

Commit 57ebe2d

Browse files
committed
Bug#22320066 EVENTS_STATEMENTS_SUMMARY_BY_DIGEST: NO UNIQUE ROWS BY
DIGEST/SCHEMA_NAME Fix for 5.6 (backported from 5.7 and 8.0) Before this fix, table events_statements_summary_by_digest exposed many rows for the same statement digest and schema, breaking the expected uniqueness of digests. The root cause is in function find_or_create_digest(), which does not handle duplicate inserts in the LF_HASH properly. When two different sessions execute the same statement for the first time, each session creates an entry for the statement digest. In this case, the index is maintained properly with only one entry, but the table data itself still contained duplicated rows, orphan. The fix is to: - use a pfs_lock for a statement digest record - free the duplicate record when duplication is detected in the unique index - loop in the entire buffer to find an empty record, so that duplicate entries do not create holes and do not cause leaks - honor the pfs_lock when exposing the data. With this fix, the allocation pattern is similar to other instrumentations, like the mutex instances for example. Tested manually with debug code added to expose the race conditions. Not testable by MTR scripts.
1 parent ea42e88 commit 57ebe2d

File tree

4 files changed

+102
-57
lines changed

4 files changed

+102
-57
lines changed

storage/perfschema/pfs_digest.cc

Lines changed: 71 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -45,7 +45,7 @@ bool flag_statements_digest= true;
4545
Current index in Stat array where new record is to be inserted.
4646
index 0 is reserved for "all else" case when entire array is full.
4747
*/
48-
volatile uint32 digest_index;
48+
volatile uint32 PFS_ALIGNED digest_monotonic_index;
4949
bool digest_full= false;
5050

5151
LF_HASH digest_hash;
@@ -63,7 +63,7 @@ int init_digest(const PFS_global_param *param)
6363
*/
6464
digest_max= param->m_digest_sizing;
6565
digest_lost= 0;
66-
digest_index= 1;
66+
PFS_atomic::store_u32(& digest_monotonic_index, 1);
6767
digest_full= false;
6868

6969
if (digest_max == 0)
@@ -105,6 +105,9 @@ int init_digest(const PFS_global_param *param)
105105
+ index * pfs_max_digest_length, pfs_max_digest_length);
106106
}
107107

108+
/* Set record[0] as allocated. */
109+
statements_digest_stat_array[0].m_lock.set_allocated();
110+
108111
return 0;
109112
}
110113

@@ -207,9 +210,10 @@ find_or_create_digest(PFS_thread *thread,
207210
memcpy(hash_key.m_schema_name, schema_name, schema_name_length);
208211

209212
int res;
210-
ulong safe_index;
211213
uint retry_count= 0;
212214
const uint retry_max= 3;
215+
size_t safe_index;
216+
size_t attempts= 0;
213217
PFS_statements_digest_stat **entry;
214218
PFS_statements_digest_stat *pfs= NULL;
215219

@@ -245,55 +249,70 @@ find_or_create_digest(PFS_thread *thread,
245249
return & pfs->m_stat;
246250
}
247251

248-
safe_index= PFS_atomic::add_u32(& digest_index, 1);
249-
if (safe_index >= digest_max)
252+
while (++attempts <= digest_max)
250253
{
251-
/* The digest array is now full. */
252-
digest_full= true;
253-
pfs= &statements_digest_stat_array[0];
254-
255-
if (pfs->m_first_seen == 0)
256-
pfs->m_first_seen= now;
257-
pfs->m_last_seen= now;
258-
return & pfs->m_stat;
259-
}
260-
261-
/* Add a new record in digest stat array. */
262-
pfs= &statements_digest_stat_array[safe_index];
263-
264-
/* Copy digest hash/LF Hash search key. */
265-
memcpy(& pfs->m_digest_key, &hash_key, sizeof(PFS_digest_key));
266-
267-
/*
268-
Copy digest storage to statement_digest_stat_array so that it could be
269-
used later to generate digest text.
270-
*/
271-
pfs->m_digest_storage.copy(digest_storage);
272-
273-
pfs->m_first_seen= now;
274-
pfs->m_last_seen= now;
254+
safe_index= PFS_atomic::add_u32(& digest_monotonic_index, 1) % digest_max;
255+
if (safe_index == 0)
256+
{
257+
/* Record [0] is reserved. */
258+
safe_index= 1;
259+
}
275260

276-
res= lf_hash_insert(&digest_hash, pins, &pfs);
277-
if (likely(res == 0))
278-
{
279-
return & pfs->m_stat;
280-
}
261+
/* Add a new record in digest stat array. */
262+
pfs= &statements_digest_stat_array[safe_index];
281263

282-
if (res > 0)
283-
{
284-
/* Duplicate insert by another thread */
285-
if (++retry_count > retry_max)
264+
if (pfs->m_lock.is_free())
286265
{
287-
/* Avoid infinite loops */
288-
digest_lost++;
289-
return NULL;
266+
if (pfs->m_lock.free_to_dirty())
267+
{
268+
/* Copy digest hash/LF Hash search key. */
269+
memcpy(& pfs->m_digest_key, &hash_key, sizeof(PFS_digest_key));
270+
271+
/*
272+
Copy digest storage to statement_digest_stat_array so that it could be
273+
used later to generate digest text.
274+
*/
275+
pfs->m_digest_storage.copy(digest_storage);
276+
277+
pfs->m_first_seen= now;
278+
pfs->m_last_seen= now;
279+
280+
res= lf_hash_insert(&digest_hash, pins, &pfs);
281+
if (likely(res == 0))
282+
{
283+
pfs->m_lock.dirty_to_allocated();
284+
return & pfs->m_stat;
285+
}
286+
287+
pfs->m_lock.dirty_to_free();
288+
289+
if (res > 0)
290+
{
291+
/* Duplicate insert by another thread */
292+
if (++retry_count > retry_max)
293+
{
294+
/* Avoid infinite loops */
295+
digest_lost++;
296+
return NULL;
297+
}
298+
goto search;
299+
}
300+
301+
/* OOM in lf_hash_insert */
302+
digest_lost++;
303+
return NULL;
304+
}
290305
}
291-
goto search;
292306
}
293307

294-
/* OOM in lf_hash_insert */
295-
digest_lost++;
296-
return NULL;
308+
/* The digest array is now full. */
309+
digest_full= true;
310+
pfs= &statements_digest_stat_array[0];
311+
312+
if (pfs->m_first_seen == 0)
313+
pfs->m_first_seen= now;
314+
pfs->m_last_seen= now;
315+
return & pfs->m_stat;
297316
}
298317

299318
void purge_digest(PFS_thread* thread, PFS_digest_key *hash_key)
@@ -320,10 +339,12 @@ void purge_digest(PFS_thread* thread, PFS_digest_key *hash_key)
320339

321340
void PFS_statements_digest_stat::reset_data(unsigned char *token_array, uint length)
322341
{
342+
m_lock.set_dirty();
323343
m_digest_storage.reset(token_array, length);
324344
m_stat.reset();
325345
m_first_seen= 0;
326346
m_last_seen= 0;
347+
m_lock.dirty_to_free();
327348
}
328349

329350
void PFS_statements_digest_stat::reset_index(PFS_thread *thread)
@@ -351,11 +372,14 @@ void reset_esms_by_digest()
351372
statements_digest_stat_array[index].reset_data(statements_digest_token_array + index * pfs_max_digest_length, pfs_max_digest_length);
352373
}
353374

375+
/* Mark record[0] as allocated again. */
376+
statements_digest_stat_array[0].m_lock.set_allocated();
377+
354378
/*
355379
Reset index which indicates where the next calculated digest information
356380
to be inserted in statements_digest_stat_array.
357381
*/
358-
digest_index= 1;
382+
PFS_atomic::store_u32(& digest_monotonic_index, 1);
359383
digest_full= false;
360384
}
361385

storage/perfschema/pfs_digest.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -44,6 +44,9 @@ struct PFS_digest_key
4444
/** A statement digest stat record. */
4545
struct PFS_ALIGNED PFS_statements_digest_stat
4646
{
47+
/** Internal lock. */
48+
pfs_lock m_lock;
49+
4750
/** Digest Schema + MD5 Hash. */
4851
PFS_digest_key m_digest_key;
4952

storage/perfschema/pfs_lock.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -155,6 +155,18 @@ struct pfs_lock
155155
PFS_atomic::store_u32(&m_version_state, new_val);
156156
}
157157

158+
/**
159+
Initialize a lock to dirty.
160+
*/
161+
void set_dirty(void)
162+
{
163+
/* Do not set the version to 0, read the previous value. */
164+
uint32 copy= PFS_atomic::load_u32(&m_version_state);
165+
/* Increment the version, set the DIRTY state */
166+
uint32 new_val= (copy & VERSION_MASK) + VERSION_INC + PFS_LOCK_DIRTY;
167+
PFS_atomic::store_u32(&m_version_state, new_val);
168+
}
169+
158170
/**
159171
Execute a dirty to free transition.
160172
This transition should be executed by the writer that owns the record.

storage/perfschema/table_esms_by_digest.cc

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -238,11 +238,14 @@ int table_esms_by_digest::rnd_next(void)
238238
m_pos.next())
239239
{
240240
digest_stat= &statements_digest_stat_array[m_pos.m_index];
241-
if (digest_stat->m_first_seen != 0)
241+
if (digest_stat->m_lock.is_populated())
242242
{
243-
make_row(digest_stat);
244-
m_next_pos.set_after(&m_pos);
245-
return 0;
243+
if (digest_stat->m_first_seen != 0)
244+
{
245+
make_row(digest_stat);
246+
m_next_pos.set_after(&m_pos);
247+
return 0;
248+
}
246249
}
247250
}
248251

@@ -260,10 +263,13 @@ table_esms_by_digest::rnd_pos(const void *pos)
260263
set_position(pos);
261264
digest_stat= &statements_digest_stat_array[m_pos.m_index];
262265

263-
if (digest_stat->m_first_seen != 0)
266+
if (digest_stat->m_lock.is_populated())
264267
{
265-
make_row(digest_stat);
266-
return 0;
268+
if (digest_stat->m_first_seen != 0)
269+
{
270+
make_row(digest_stat);
271+
return 0;
272+
}
267273
}
268274

269275
return HA_ERR_RECORD_DELETED;

0 commit comments

Comments
 (0)