From 31f513165519967397e4cfa51b74583567a06898 Mon Sep 17 00:00:00 2001 From: Kamil Holubicki Date: Wed, 24 Jul 2024 16:04:06 +0200 Subject: [PATCH 1/3] PS-9238: Make MySQL 5.7 compatible with CREATE TABLE AS SELECT [...] START TRANSACTION to improve 8.0 -> 5.7 replication reliability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://perconadev.atlassian.net/browse/PS-9238 Problem: 5.7 does not understand the new syntax and complains about CREATE TABLE ... START TRANSACTION received in binlog. Cause: Commit 4fe3f2f2ce5 (8.0.21) introduced changes that allow the execution of CREATE TABLE AS SELECT as a single transaction. Before that change, CREATE TABLE was the 1st transaction, then rows were inserted, which was unsafe. To achieve this we don't commit the transaction after CREATE TABLE. The commit is done after rows are inserted. For async replica to understand this protocol, START TRANSACTION clause was added to CREATE TABLE in binlog. When the replica sees this, it knows not to commit after CREATE TABLE. Solution: Introduced ctas_compatibility_mode global, read-only variable, OFF by default. If switched to ON, it enforces the behavior from before the above-mentioned commit. Implementation details: The fix contains 2 parts: 1. Query_result_create::binlog_show_create_table() - When SE supports atomic DDL, CREATE TABLE query was binlogged with direct=false / is_trans=true flags. This caused the event to be cached in trx_cache instead of stmt_cache. MYSQL_BIN_LOG::commit() logic skips trx_cache commit, if this is not the transaction commit (it does only stmt_cache commit). Then, when rows are inserted, it was detected that there is ongoing transaction (trx_cache not empty), so no new BEGIN statement was generated in binlog. Effectively CREATE TABLE and rows insertion were done in one transaction. The fix is to revert to the default behavior, i.e., Query_result_create::binlog_show_create_table() creates the event in stmt_cache, which is committed after table creation. When rows are being inserted, it is detected that trx_cache is empty, so BEGIN statement is added to binlog. 2. store_create_info()—When executing CTAS, the START TRANSACTION clause was added to the rewritten query to inform the replica about what was happening. The fix is not to add the START TRANSACTION clause. --- mysql-test/r/mysqld--help-notwin.result | 4 ++ mysql-test/suite/sys_vars/r/all_vars.result | 2 + .../sys_vars/r/ctas_compatibility_mode.result | 11 +++++ .../sys_vars/t/ctas_compatibility_mode.test | 45 +++++++++++++++++++ sql/mysqld.cc | 2 +- sql/mysqld.h | 1 + sql/sql_insert.cc | 5 ++- sql/sql_show.cc | 3 +- sql/sys_vars.cc | 7 +++ 9 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result create mode 100644 mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index 64cf7fd21091..b1bc99e3bac2 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -307,6 +307,9 @@ The following options may be given as the first argument: --create-admin-listener-thread Use a dedicated thread for listening incoming connections on admin interface + --ctas-compatibility-mode + Execute and binlog CTAS in pre 8.0.21 way, i.e. with + intermediate commit after the table creation. --cte-max-recursion-depth=# Abort a recursive common table expression if it does more than this number of iterations. @@ -1856,6 +1859,7 @@ connection-memory-limit 18446744073709551615 console FALSE coredumper (No default value) create-admin-listener-thread FALSE +ctas-compatibility-mode FALSE cte-max-recursion-depth 1000 daemonize FALSE default-authentication-plugin caching_sha2_password diff --git a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result index ab906a8ce577..c079b7c2df6a 100644 --- a/mysql-test/suite/sys_vars/r/all_vars.result +++ b/mysql-test/suite/sys_vars/r/all_vars.result @@ -24,6 +24,8 @@ binlog_expire_logs_auto_purge binlog_expire_logs_auto_purge binlog_rotate_encryption_master_key_at_startup binlog_rotate_encryption_master_key_at_startup +ctas_compatibility_mode +ctas_compatibility_mode cte_max_recursion_depth cte_max_recursion_depth default_collation_for_utf8mb4 diff --git a/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result b/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result new file mode 100644 index 000000000000..e7c4dc15322a --- /dev/null +++ b/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result @@ -0,0 +1,11 @@ +include/assert.inc [ctas_compatibility_mode default should be OFF] +SET GLOBAL ctas_compatibility_mode = ON; +ERROR HY000: Variable 'ctas_compatibility_mode' is a read only variable +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES (0); +# restart: --ctas-compatibility-mode=ON --log-bin=ctas_binlog +CREATE TABLE t2 AS SELECT * FROM t1; +# restart: +include/assert_grep.inc ["Checking if ctas_compatibility_mode works"] +include/assert_grep.inc ["Checking if rows are inserted as the separate transaction"] +DROP TABLE t1, t2; diff --git a/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test b/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test new file mode 100644 index 000000000000..07de6ef442cb --- /dev/null +++ b/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test @@ -0,0 +1,45 @@ +# +# The default is OFF +# +--let $assert_text= ctas_compatibility_mode default should be OFF +--let $assert_cond= "[SHOW GLOBAL VARIABLES LIKE "ctas_compatibility_mode", Value, 1]" = "OFF" +--source include/assert.inc + +# +# Check that it is read-only +# +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET GLOBAL ctas_compatibility_mode = ON; + +# +# Check that compatibility mode works +# +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES (0); + +--let $binlog_file = ctas_binlog +--let $restart_parameters = "restart: --ctas-compatibility-mode=ON --log-bin=$binlog_file" +--source include/restart_mysqld.inc + +CREATE TABLE t2 AS SELECT * FROM t1; + +--let $restart_parameters = "restart:" +--source include/restart_mysqld.inc + +let $MYSQLD_DATADIR= `select @@datadir;`; +--exec $MYSQL_BINLOG --verbose $MYSQLD_DATADIR/$binlog_file.000001 > $MYSQLTEST_VARDIR/tmp/$binlog_file.sql +--let $assert_file = $MYSQLTEST_VARDIR/tmp/$binlog_file.sql +--let $assert_text = "Checking if ctas_compatibility_mode works" +--let $assert_select = START TRANSACTION +--let $assert_count = 0 +--source include/assert_grep.inc + +--let $assert_text = "Checking if rows are inserted as the separate transaction" +--let $assert_select = BEGIN +--let $assert_count = 1 +--source include/assert_grep.inc + +# cleanup +DROP TABLE t1, t2; +--remove_file $MYSQLD_DATADIR/$binlog_file.000001 +--remove_file $MYSQLTEST_VARDIR/tmp/$binlog_file.sql diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 7b284c514970..a19ea0897e80 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1241,7 +1241,7 @@ bool opt_show_replica_auth_info; bool opt_log_replica_updates = false; char *opt_replica_skip_errors; bool opt_replica_allow_batching = true; - +bool opt_ctas_compatibility_mode = false; /** compatibility option: - index usage hints (USE INDEX without a FOR clause) behave as in 5.0 diff --git a/sql/mysqld.h b/sql/mysqld.h index 1dd1b54f11a9..f8452449977c 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -197,6 +197,7 @@ extern Rpl_acf_configuration_handler *rpl_acf_configuration_handler; extern Source_IO_monitor *rpl_source_io_monitor; extern int32_t opt_regexp_time_limit; extern int32_t opt_regexp_stack_limit; +extern bool opt_ctas_compatibility_mode; #ifdef _WIN32 extern bool opt_no_monitor; #endif // _WIN32 diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index b0280ff1fb59..df4bd1133a68 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -3039,8 +3039,9 @@ int Query_result_create::binlog_show_create_table(THD *thd) { bool is_trans = false; bool direct = true; - if (get_default_handlerton(thd, thd->lex->create_info->db_type)->flags & - HTON_SUPPORTS_ATOMIC_DDL) { + if ((get_default_handlerton(thd, thd->lex->create_info->db_type)->flags & + HTON_SUPPORTS_ATOMIC_DDL) && + !opt_ctas_compatibility_mode) { is_trans = true; direct = false; } diff --git a/sql/sql_show.cc b/sql/sql_show.cc index ea84451565cd..8c5604d097fc 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -2363,7 +2363,8 @@ bool store_create_info(THD *thd, Table_ref *table_list, String *packet, This is done only while binlogging CREATE TABLE AS SELECT. */ if (!thd->lex->query_block->field_list_is_empty() && - (create_info_arg->db_type->flags & HTON_SUPPORTS_ATOMIC_DDL)) { + (create_info_arg->db_type->flags & HTON_SUPPORTS_ATOMIC_DDL) && + !opt_ctas_compatibility_mode) { packet->append(STRING_WITH_LEN(" START TRANSACTION")); } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 53735a9773f1..b9071fd1d651 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -4292,6 +4292,13 @@ static Sys_var_int32 Sys_regexp_stack_limit( GLOBAL_VAR(opt_regexp_stack_limit), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, INT32_MAX), DEFAULT(8000000), BLOCK_SIZE(1)); +static Sys_var_bool Sys_ctas_compatibility_mode( + "ctas_compatibility_mode", + "Execute and binlog CTAS in pre 8.0.21 way, i.e. with intermediate commit " + "after the table creation.", + READ_ONLY GLOBAL_VAR(opt_ctas_compatibility_mode), CMD_LINE(OPT_ARG), + DEFAULT(false)); + static Sys_var_bool Sys_replica_compressed_protocol( "replica_compressed_protocol", "Use compression in the source/replica protocol.", From 966f479b27646a9558a71ca814f474a054b7bd5c Mon Sep 17 00:00:00 2001 From: Kamil Holubicki Date: Thu, 25 Jul 2024 12:33:10 +0200 Subject: [PATCH 2/3] PS-9238: Make MySQL 5.7 compatible with CREATE TABLE AS SELECT [...] START TRANSACTION to improve 8.0 -> 5.7 replication reliability https://perconadev.atlassian.net/browse/PS-9238 Part 2 of the fix. In the scenario source -> replica1 -> replica2, if replica1 is configured with ctas_compatibility_mode it should behave accordingly and produce the binglog compatible with version 8.0.20 (intermediate commit after CREATE TABLE and no START TRANSACTION clause) This part is sensitive to create_info->m_transactional_ddl flag which is set during statement parsing. If ctas_compatibility_mode is set, we force this flag not to be set. Additionally, replica side follows the path of regular CREATE TABLE which binlogs the query as-is. if ctas_compatibility_mode is set, CREATE TABLE is removed from the query. --- .../sys_vars/r/ctas_compatibility_mode.result | 22 +++++++ .../sys_vars/t/ctas_compatibility_mode.test | 59 +++++++++++++++++++ sql/parse_tree_nodes.h | 32 ++++++++-- sql/sql_table.cc | 19 +++++- 4 files changed, 126 insertions(+), 6 deletions(-) diff --git a/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result b/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result index e7c4dc15322a..5c47c95f1e15 100644 --- a/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result +++ b/mysql-test/suite/sys_vars/r/ctas_compatibility_mode.result @@ -9,3 +9,25 @@ CREATE TABLE t2 AS SELECT * FROM t1; include/assert_grep.inc ["Checking if ctas_compatibility_mode works"] include/assert_grep.inc ["Checking if rows are inserted as the separate transaction"] DROP TABLE t1, t2; +include/rpl_init.inc [topology=1->2] +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information. +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES (0); +include/rpl_sync.inc +[connection server_2] +include/rpl_restart_server.inc [server_number=2 parameters: --ctas-compatibility-mode=ON --log-bin=ctas_binlog] +include/rpl_start_slaves.inc +[connection server_1] +CREATE TABLE t2 AS SELECT * FROM t1; +include/rpl_sync.inc +[connection server_2] +include/rpl_restart_server.inc [server_number=2] +include/rpl_start_slaves.inc +[connection server_2] +include/assert_grep.inc ["Checking if ctas_compatibility_mode works on replica"] +include/assert_grep.inc ["Checking if rows are inserted as the separate transaction on replica"] +[connection server_1] +DROP TABLE t1, t2; +include/rpl_end.inc diff --git a/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test b/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test index 07de6ef442cb..2637502bf0f2 100644 --- a/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test +++ b/mysql-test/suite/sys_vars/t/ctas_compatibility_mode.test @@ -43,3 +43,62 @@ let $MYSQLD_DATADIR= `select @@datadir;`; DROP TABLE t1, t2; --remove_file $MYSQLD_DATADIR/$binlog_file.000001 --remove_file $MYSQLTEST_VARDIR/tmp/$binlog_file.sql + + +# +# Check the behavior of replica started with ctas_compatibility_mode enabled +# +--let $rpl_topology = 1->2 +--source include/rpl_init.inc +CREATE TABLE t1 (a int); +INSERT INTO t1 VALUES (0); +--source include/rpl_sync.inc + +# Now restart replica with ctas-compatibility-mode=ON and a custom binlog file +--let $rpl_connection_name = server_2 +--source include/rpl_connection.inc +--let $binlog_file = ctas_binlog +--let $rpl_server_parameters = --ctas-compatibility-mode=ON --log-bin=$binlog_file +--let $rpl_server_number = 2 +--source include/rpl_restart_server.inc +--source include/rpl_start_slaves.inc + +# Execute CTAS on source server +--let $rpl_connection_name = server_1 +--source include/rpl_connection.inc +CREATE TABLE t2 AS SELECT * FROM t1; +--source include/rpl_sync.inc + +# Restart replica with the default parameters +--let $rpl_connection_name = server_2 +--source include/rpl_connection.inc +--let $rpl_server_parameters = +--let $rpl_server_number = 2 +--source include/rpl_restart_server.inc +--source include/rpl_start_slaves.inc + +# We expect that replica started with --ctas-compatibility-mode=ON behaved accordingly +--let $rpl_connection_name = server_2 +--source include/rpl_connection.inc +let $MYSQLD_DATADIR= `select @@datadir;`; +--exec $MYSQL_BINLOG --verbose $MYSQLD_DATADIR/$binlog_file.000001 > $MYSQLTEST_VARDIR/tmp/$binlog_file.sql +--let $assert_file = $MYSQLTEST_VARDIR/tmp/$binlog_file.sql +--let $assert_text = "Checking if ctas_compatibility_mode works on replica" +--let $assert_select = START TRANSACTION +--let $assert_count = 0 +--source include/assert_grep.inc + +--let $assert_text = "Checking if rows are inserted as the separate transaction on replica" +--let $assert_select = BEGIN +--let $assert_count = 1 +--source include/assert_grep.inc + +# cleanup +--remove_file $MYSQLD_DATADIR/$binlog_file.000001 +--remove_file $MYSQLTEST_VARDIR/tmp/$binlog_file.sql + +--let $rpl_connection_name = server_1 +--source include/rpl_connection.inc +DROP TABLE t1, t2; + +--source include/rpl_end.inc diff --git a/sql/parse_tree_nodes.h b/sql/parse_tree_nodes.h index a4123343727f..2d9f994c9ad4 100644 --- a/sql/parse_tree_nodes.h +++ b/sql/parse_tree_nodes.h @@ -46,6 +46,7 @@ #include "sql/handler.h" #include "sql/key_spec.h" #include "sql/mem_root_array.h" +#include "sql/mysqld.h" // opt_ctas_compatibility_mode #include "sql/opt_explain.h" // Sql_cmd_explain_other_thread #include "sql/parse_location.h" #include "sql/parse_tree_helpers.h" // PT_item_list @@ -2484,10 +2485,33 @@ typedef PT_traceable_create_table_option< TYPE_AND_REF(HA_CREATE_INFO::key_block_size), HA_CREATE_USED_KEY_BLOCK_SIZE> PT_create_key_block_size_option; -typedef PT_traceable_create_table_option< - TYPE_AND_REF(HA_CREATE_INFO::m_transactional_ddl), - HA_CREATE_USED_START_TRANSACTION> - PT_create_start_transaction_option; +class PT_create_start_transaction_option + : public PT_traceable_create_table_option< + TYPE_AND_REF(HA_CREATE_INFO::m_transactional_ddl), + HA_CREATE_USED_START_TRANSACTION> { + typedef PT_create_table_option super; + + decltype(HA_CREATE_INFO::m_transactional_ddl) value; + + public: + explicit PT_create_start_transaction_option( + decltype(HA_CREATE_INFO::m_transactional_ddl) value) + : PT_traceable_create_table_option< + TYPE_AND_REF(HA_CREATE_INFO::m_transactional_ddl), + HA_CREATE_USED_START_TRANSACTION>(value), + value(value) {} + + bool contextualize(Table_ddl_parse_context *pc) override { + if (super::contextualize(pc)) return true; + if (opt_ctas_compatibility_mode) { + pc->create_info->m_transactional_ddl = false; + } else { + pc->create_info->m_transactional_ddl = value; + pc->create_info->used_fields |= HA_CREATE_USED_START_TRANSACTION; + } + return false; + } +}; typedef PT_traceable_create_table_option< TYPE_AND_REF(HA_CREATE_INFO::m_implicit_tablespace_autoextend_size), diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 4119753ca65c..6af0b346fa69 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -10396,8 +10396,23 @@ bool mysql_create_table(THD *thd, Table_ref *create_table, } } } else { - result = write_bin_log(thd, true, thd->query().str, thd->query().length, - is_trans); + /* + We can get here from replica thread executing + CREATE TABLE ... START TRANSACTION. If ctas_compatibility_mode==true + we follow the right path because of create_info->m_transactional_ddl + being set properly to false in + PT_create_start_transaction_option::contextualize(), but we need to + remove START TRANSACTION clause from the query before binlogging it. + */ + size_t query_length = thd->query().length; + if (opt_ctas_compatibility_mode) { + const char *pos = strstr(thd->query().str, "START TRANSACTION"); + if (pos != nullptr) { + query_length = pos - thd->query().str; + } + } + result = + write_bin_log(thd, true, thd->query().str, query_length, is_trans); } } } From 78b64a42eeea66739209063d2c87bf70f09e5e09 Mon Sep 17 00:00:00 2001 From: Kamil Holubicki Date: Thu, 25 Jul 2024 22:12:47 +0200 Subject: [PATCH 3/3] PS-9238: Make MySQL 5.7 compatible with CREATE TABLE AS SELECT [...] START TRANSACTION to improve 8.0 -> 5.7 replication reliability https://perconadev.atlassian.net/browse/PS-9238 Part 2.1 of the fix. When ctas_compatibility_mode=OFF the binlog sequence is: BEGIN CREATE TABLE ... START TRANSACTION ROW EVENT COMMIT MTS sees the above as one group. When the source produces binlog with CREATE TABLE ... START TRANSACTION but the intermediate replica is configured with ctas_compatibility_mode=ON it has to execute and binlog CTAS in legacy mode (pre 8.0.20). It means the intermediate commit will be done after CREATE TABLE. This commit will call pre_commit hook, which will call Slave_worker::commit_positions() where we check the validity of ptr_group->checkpoint_seqno. checkpoint_seqno however is set when group end is detected (COMMIT event), but it is too late. We detect CTAS case and set checkpoint_seqno to be prepared for intermediate commit. --- sql/log_event.cc | 9 +++++++++ sql/log_event.h | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/sql/log_event.cc b/sql/log_event.cc index 145bab46f23e..0916f11349f4 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -3035,6 +3035,15 @@ Slave_worker *Log_event::get_slave_worker(Relay_log_info *rli) { #ifndef NDEBUG w_rr++; #endif + } else if (opt_ctas_compatibility_mode && is_ctas()) { + /* + In ctas compatibility there will be intermediate commit + after CREATE TABLE. It will call pre_commit hook, which will call + Slave_worker::commit_positions() where we check the validity of + ptr_group->checkpoint_seqno. + */ + ptr_group->checkpoint_seqno = rli->rli_checkpoint_seqno; + rli->rli_checkpoint_seqno++; } return ret_worker; diff --git a/sql/log_event.h b/sql/log_event.h index 1d5a2116ae96..cbd8186dd43a 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1099,6 +1099,7 @@ class Log_event { false otherwise */ virtual bool ends_group() const { return false; } + virtual bool is_ctas() const { return false; } #ifdef MYSQL_SERVER /** Apply the event to the database. @@ -1471,6 +1472,12 @@ class Query_log_event : public virtual binary_log::Query_event, native_strncasecmp(query, STRING_WITH_LEN("ROLLBACK TO "))) || !strncmp(query, STRING_WITH_LEN("XA ROLLBACK")); } + + bool is_ctas() const override { + return (strstr(query, "CREATE TABLE") != nullptr) && + (strstr(query, "START TRANSACTION") != nullptr); + } + static size_t get_query(const char *buf, size_t length, const Format_description_event *fd_event, const char **query_arg);