Skip to content

Commit ca43fae

Browse files
author
Tobias Christiani
committed
WL#15786 Automatically updated histograms
This patch adds automatically updated histograms to the server. Individual histograms can be set to be either automatically or manually updated by specifying a new trailing option [{AUTO|MANUAL} UPDATE] on the UPDATE HISTOGRAM command. The default setting is MANUAL UPDATE. Automatic updates of histograms take place as follows: When ANALYZE TABLE <table> is called automatically updated histograms on <table> will be updated (re-created with the same settings from a fresh sample of data from the table) and made available to the optimizer. For InnoDB tables, when automatic recalculation of persistent statistics takes place, any automatically updated histograms on the table are also updated and made available to the optimizer. Change-Id: I4755c80a896962a6094a6bf5140477927d3a2e31
1 parent 90e028a commit ca43fae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2144
-908
lines changed

mysql-test/include/store_histogram_and_check.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
eval ANALYZE TABLE $tbl_name UPDATE HISTOGRAM ON $col_name WITH $buckets BUCKETS;
1+
eval ANALYZE TABLE $tbl_name UPDATE HISTOGRAM ON $col_name WITH $buckets BUCKETS $update UPDATE;
22
let $result1 = `SELECT JSON_REMOVE(HISTOGRAM, '$."last-updated"') AS HISTOGRAM FROM information_schema.COLUMN_STATISTICS WHERE TABLE_NAME="$tbl_name" AND COLUMN_NAME="$col_name";`;
33
let $selectivity1 = `EXPLAIN SELECT * FROM $tbl_name WHERE $col_name < $comparison_value;`;
44
eval ANALYZE TABLE $tbl_name UPDATE HISTOGRAM ON $col_name USING DATA '$json_data';

mysql-test/include/subquery.inc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3400,8 +3400,10 @@ show status like '%Handler_read_rnd_next';
34003400
delete from t2;
34013401
insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);
34023402

3403-
flush status;
3403+
# ANALYZE TABLE makes handler calls when e.g. updating histograms,
3404+
# so we FLUSH STATUS after calling ANALYZE TABLE.
34043405
ANALYZE TABLE t1, t2;
3406+
FLUSH STATUS;
34053407
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
34063408
--skip_if_hypergraph # Depends on the query plan.
34073409
show status like '%Handler_read%';

mysql-test/r/alter_table.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3527,7 +3527,7 @@ SELECT schema_name, table_name, column_name,
35273527
JSON_REMOVE(histogram, '$."last-updated"')
35283528
FROM information_schema.COLUMN_STATISTICS;
35293529
SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"')
3530-
test foo col1 {"buckets": [[1, 0.5], [2, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 10}
3530+
test foo col1 {"buckets": [[1, 0.5], [2, 1.0]], "data-type": "int", "auto-update": false, "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 10}
35313531
ALTER TABLE foo RENAME COLUMN col1 TO col2;
35323532
SELECT schema_name, table_name, column_name,
35333533
JSON_REMOVE(histogram, '$."last-updated"')

mysql-test/r/dd_schema_definition_debug.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ SET debug = '+d,skip_dd_table_access_check';
165165
########################################################################
166166
# The actual DD version stored on disk.
167167
########################################################################
168-
DD_VERSION=80200
168+
DD_VERSION=80300
169169
########################################################################
170170
# List the CREATE TABLE statements for the DD tables.
171171
# Mask the AUTO INCREMENT counter, which is not

mysql-test/r/foreign_key_debug.result

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,6 @@ LOCK_TYPE FROM performance_schema.metadata_locks
336336
WHERE OBJECT_NAME LIKE 'child'
337337
ORDER BY OBJECT_TYPE, OBJECT_SCHEMA, OBJECT_NAME, COLUMN_NAME, LOCK_TYPE;
338338
OBJECT_TYPE OBJECT_SCHEMA OBJECT_NAME COLUMN_NAME LOCK_TYPE
339-
COLUMN STATISTICS test child fk SHARED_READ
340-
COLUMN STATISTICS test child pk SHARED_READ
341339
TABLE test child NULL SHARED_NO_READ_WRITE
342340
TABLE test child NULL SHARED_READ_ONLY
343341
# From another connection, verify that child is locked.
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#
2+
# The [{AUTO|MANUAL} UPDATE] histogram option.
3+
#
4+
CREATE TABLE t1 (x INT);
5+
# Histograms are set to MANUAL UPDATE by default ("auto-update": false).
6+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x;
7+
Table Op Msg_type Msg_text
8+
test.t1 histogram status Histogram statistics created for column 'x'.
9+
SELECT histogram->'$."auto-update"' AS should_be_false FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
10+
should_be_false
11+
false
12+
# Histograms with MANUAL UPDATE have "auto-update": false.
13+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x MANUAL UPDATE;
14+
Table Op Msg_type Msg_text
15+
test.t1 histogram status Histogram statistics created for column 'x'.
16+
SELECT histogram->'$."auto-update"' AS should_be_false FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
17+
should_be_false
18+
false
19+
# Histograms with AUTO UPDATE have "auto-update": true.
20+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x AUTO UPDATE;
21+
Table Op Msg_type Msg_text
22+
test.t1 histogram status Histogram statistics created for column 'x'.
23+
SELECT histogram->'$."auto-update"' AS should_be_true FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
24+
should_be_true
25+
true
26+
DROP TABLE t1;
27+
#
28+
# ANALYZE TABLE and the [{AUTO|MANUAL} UPDATE] histogram option.
29+
#
30+
# ANALYZE TABLE should only update histograms with AUTO UPDATE.
31+
CREATE TABLE t1 (c1 INT, c2 INT, c3 INT, c4 INT) STATS_AUTO_RECALC=0;
32+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON c1, c3 AUTO UPDATE;
33+
Table Op Msg_type Msg_text
34+
test.t1 histogram status Histogram statistics created for column 'c1'.
35+
test.t1 histogram status Histogram statistics created for column 'c3'.
36+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON c2 MANUAL UPDATE;
37+
Table Op Msg_type Msg_text
38+
test.t1 histogram status Histogram statistics created for column 'c2'.
39+
# Should have three empty histograms.
40+
SELECT COLUMN_NAME, histogram->'$."buckets"' FROM INFORMATION_SCHEMA.COLUMN_STATISTICS ORDER BY COLUMN_NAME;
41+
COLUMN_NAME histogram->'$."buckets"'
42+
c1 []
43+
c2 []
44+
c3 []
45+
INSERT INTO t1 VALUES (1, 1, 1, 1);
46+
ANALYZE TABLE t1;
47+
Table Op Msg_type Msg_text
48+
test.t1 analyze status OK
49+
# Histograms on c1 and c3 should be non-empty, c2 should still be empty.
50+
SELECT COLUMN_NAME, histogram->'$."buckets"' FROM INFORMATION_SCHEMA.COLUMN_STATISTICS ORDER BY COLUMN_NAME;
51+
COLUMN_NAME histogram->'$."buckets"'
52+
c1 [[1, 1.0]]
53+
c2 []
54+
c3 [[1, 1.0]]
55+
DROP TABLE t1;
56+
# Histograms with AUTO UPDATE preserve their number of specified buckets when updated.
57+
CREATE TABLE t1 (x INT) STATS_AUTO_RECALC=0;
58+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x WITH 3 BUCKETS AUTO UPDATE;
59+
Table Op Msg_type Msg_text
60+
test.t1 histogram status Histogram statistics created for column 'x'.
61+
INSERT INTO t1 VALUES (1), (2);
62+
ANALYZE TABLE t1;
63+
Table Op Msg_type Msg_text
64+
test.t1 analyze status OK
65+
SELECT histogram->'$."histogram-type"' AS should_be_singleton FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
66+
should_be_singleton
67+
"singleton"
68+
SELECT histogram->'$."number-of-buckets-specified"' AS should_be_3 FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
69+
should_be_3
70+
3
71+
# Add more values and rebuild the histogram. The histogram type should
72+
# change to equi-height, but the number of buckets should still be 3.
73+
INSERT INTO t1 VALUES (3), (4), (5);
74+
ANALYZE TABLE t1;
75+
Table Op Msg_type Msg_text
76+
test.t1 analyze status OK
77+
SELECT histogram->'$."histogram-type"' AS should_be_equiheight FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
78+
should_be_equiheight
79+
"equi-height"
80+
SELECT histogram->'$."number-of-buckets-specified"' AS should_be_3 FROM INFORMATION_SCHEMA.COLUMN_STATISTICS;
81+
should_be_3
82+
3
83+
DROP TABLE t1;
84+
# After ANALYZE TABLE the updated histograms should be used for optimization.
85+
CREATE TABLE t1 (x INT) STATS_AUTO_RECALC=0;
86+
INSERT INTO t1 VALUES (1), (2), (3), (4), (5);
87+
ANALYZE TABLE t1;
88+
Table Op Msg_type Msg_text
89+
test.t1 analyze status OK
90+
# With no histogram the default assumed selectivity when filtering a range is 0.333...
91+
EXPLAIN FORMAT=JSON INTO @v SELECT x FROM t1 WHERE x > 0;
92+
SELECT JSON_EXTRACT(@v, '$.query_block.table.filtered') AS should_be_33;
93+
should_be_33
94+
"33.33"
95+
DROP TABLE t1;
96+
CREATE TABLE t1 (x INT) STATS_AUTO_RECALC=0;
97+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x AUTO UPDATE;
98+
Table Op Msg_type Msg_text
99+
test.t1 histogram status Histogram statistics created for column 'x'.
100+
INSERT INTO t1 VALUES (1), (2), (3), (4), (5);
101+
ANALYZE TABLE t1;
102+
Table Op Msg_type Msg_text
103+
test.t1 analyze status OK
104+
# With the updated histogram we should have a selectivity of 1.0.
105+
EXPLAIN FORMAT=JSON INTO @v SELECT x FROM t1 WHERE x > 0;
106+
SELECT JSON_EXTRACT(@v, '$.query_block.table.filtered') AS should_be_100;
107+
should_be_100
108+
"100.00"
109+
DROP TABLE t1;
110+
#
111+
# Histogram updates from the InnoDB background statistics thread.
112+
#
113+
CREATE TABLE t1 (x INT);
114+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x AUTO UPDATE;
115+
Table Op Msg_type Msg_text
116+
test.t1 histogram status Histogram statistics created for column 'x'.
117+
INSERT INTO t1 VALUES (1), (1), (1), (1), (1);
118+
# Wait for the automatic update to kick in.
119+
DO SLEEP(0.1);
120+
# Verify that the optimizer uses the new histogram.
121+
EXPLAIN FORMAT=JSON INTO @v SELECT x FROM t1 WHERE x = 1;
122+
SELECT JSON_EXTRACT(@v, '$.query_block.table.filtered') AS should_be_100;
123+
should_be_100
124+
"100.00"
125+
# Insert values into the table and wait for the histogram to be updated in the background.
126+
INSERT INTO t1 VALUES (2), (2), (2), (2), (2);
127+
DO SLEEP(0.1);
128+
# Verify that the optimizer uses the new histogram.
129+
EXPLAIN FORMAT=JSON INTO @v SELECT x FROM t1 WHERE x = 2;
130+
SELECT JSON_EXTRACT(@v, '$.query_block.table.filtered') AS should_be_50;
131+
should_be_50
132+
"50.00"
133+
DROP TABLE t1;
134+
#
135+
# Test how failures are handled during histogram updates under ANALYZE TABLE.
136+
#
137+
CREATE TABLE t1 (x INT) STATS_AUTO_RECALC=0;
138+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x AUTO UPDATE;
139+
Table Op Msg_type Msg_text
140+
test.t1 histogram status Histogram statistics created for column 'x'.
141+
ANALYZE TABLE t1;
142+
Table Op Msg_type Msg_text
143+
test.t1 analyze status OK
144+
SELECT histogram->'$."buckets"' AS should_be_empty FROM INFORMATION_SCHEMA.COLUMN_STATISTICS ORDER BY COLUMN_NAME;
145+
should_be_empty
146+
[]
147+
INSERT INTO t1 VALUES (1), (2), (3);
148+
SET DEBUG = '+d,update_histograms_failure';
149+
# Errors from the diagnostics area should be displayed when the histogram update fails.
150+
ANALYZE TABLE t1;
151+
Table Op Msg_type Msg_text
152+
test.t1 analyze Error Unable to build histogram statistics for column 'field' in table 'schema'.'table'
153+
test.t1 analyze status Operation failed
154+
# Verify that the histogram update is rolled back when it fails, i.e. the histogram should still be empty.
155+
SELECT histogram->'$."buckets"' AS should_be_empty FROM INFORMATION_SCHEMA.COLUMN_STATISTICS ORDER BY COLUMN_NAME;
156+
should_be_empty
157+
[]
158+
# Without the failure the histogram should be updated.
159+
SET DEBUG = '-d,update_histograms_failure';
160+
ANALYZE TABLE t1;
161+
Table Op Msg_type Msg_text
162+
test.t1 analyze status OK
163+
SELECT histogram->'$."buckets"' AS should_be_nonempty FROM INFORMATION_SCHEMA.COLUMN_STATISTICS ORDER BY COLUMN_NAME;
164+
should_be_nonempty
165+
[[1, 0.3333333333333333], [2, 0.6666666666666666], [3, 1.0]]
166+
DROP TABLE t1;
167+
#
168+
# ANALYZE TABLE t1, t2 should update histograms on both t1 and t2.
169+
#
170+
CREATE TABLE t1 (x INT) STATS_AUTO_RECALC=0;
171+
CREATE TABLE t2 (x INT) STATS_AUTO_RECALC=0;
172+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x AUTO UPDATE;
173+
Table Op Msg_type Msg_text
174+
test.t1 histogram status Histogram statistics created for column 'x'.
175+
ANALYZE TABLE t2 UPDATE HISTOGRAM ON x AUTO UPDATE;
176+
Table Op Msg_type Msg_text
177+
test.t2 histogram status Histogram statistics created for column 'x'.
178+
INSERT INTO t1 VALUES (1), (2), (3);
179+
INSERT INTO t2 VALUES (4), (5), (6);
180+
ANALYZE TABLE t1, t2;
181+
Table Op Msg_type Msg_text
182+
test.t1 analyze status OK
183+
test.t2 analyze status OK
184+
SELECT TABLE_NAME, histogram->'$."buckets"' AS should_be_nonempty FROM INFORMATION_SCHEMA.COLUMN_STATISTICS ORDER BY COLUMN_NAME;
185+
TABLE_NAME should_be_nonempty
186+
t1 [[1, 0.3333333333333333], [2, 0.6666666666666666], [3, 1.0]]
187+
t2 [[4, 0.3333333333333333], [5, 0.6666666666666666], [6, 1.0]]
188+
DROP TABLE t1;
189+
DROP TABLE t2;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#
2+
# Errors during background histogram updates should show up in the error log.
3+
#
4+
CREATE TABLE t1 (x INT);
5+
SET SESSION DEBUG = '-d,update_histograms_failure';
6+
ANALYZE TABLE t1 UPDATE HISTOGRAM ON x AUTO UPDATE;
7+
Table Op Msg_type Msg_text
8+
test.t1 histogram status Histogram statistics created for column 'x'.
9+
include/save_error_log_position.inc
10+
INSERT INTO t1 VALUES (1), (2), (3);
11+
# Wait for an error to show up in the error log.
12+
# Verify that the error comes from the background histogram update.
13+
include/assert_error_log.inc [server: 1, pattern: Background histogram update on test.t1: Unable to build histogram statistics for column 'field' in table 'schema'.'table']
14+
DROP TABLE t1;

mysql-test/r/histogram_equi_height.result

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,7 @@ JSON_PRETTY(JSON_REMOVE(histogram, '$."last-updated"'))
16051605
]
16061606
],
16071607
"data-type": "int",
1608+
"auto-update": false,
16081609
"null-values": 0.0,
16091610
"collation-id": 8,
16101611
"sampling-rate": 1.0,
@@ -1687,6 +1688,7 @@ JSON_PRETTY(JSON_REMOVE(histogram, '$."last-updated"'))
16871688
]
16881689
],
16891690
"data-type": "int",
1691+
"auto-update": false,
16901692
"null-values": 0.0,
16911693
"collation-id": 8,
16921694
"sampling-rate": 1.0,
@@ -1769,6 +1771,7 @@ JSON_PRETTY(JSON_REMOVE(histogram, '$."last-updated"'))
17691771
]
17701772
],
17711773
"data-type": "int",
1774+
"auto-update": false,
17721775
"null-values": 0.0,
17731776
"collation-id": 8,
17741777
"sampling-rate": 1.0,

0 commit comments

Comments
 (0)