From bc9d38a337ae023f5093a36b32ca2a2d1deac593 Mon Sep 17 00:00:00 2001 From: Oleksandr Kachan Date: Wed, 27 Mar 2024 16:50:59 +0200 Subject: [PATCH 1/8] PS-9148: Add caching of dictionary table for component_masking_functions https://perconadev.atlassian.net/browse/PS-9148 - Added caching of mysql.masking_dictionaries table content. - Implemented masking_dictionaries_flush() UDF which flushes data from the masking dictionaries table to the memory cache. --- components/masking_functions/CMakeLists.txt | 4 + .../dictionary_container.hpp | 56 +++++++ .../masking_functions/query_builder.hpp | 29 +--- .../include/masking_functions/query_cache.hpp | 45 ++++++ .../include/masking_functions/sql_context.hpp | 8 +- .../dictionary_container.cpp | 91 +++++++++++ .../src/masking_functions/query_builder.cpp | 32 ++-- .../src/masking_functions/query_cache.cpp | 104 +++++++++++++ .../registration_routines.cpp | 144 ++++++++++-------- .../src/masking_functions/sql_context.cpp | 24 +-- .../r/dictionary_operations.result | 46 ++++-- .../t/dictionary_operations.test | 66 +++++--- 12 files changed, 494 insertions(+), 155 deletions(-) create mode 100644 components/masking_functions/include/masking_functions/dictionary_container.hpp create mode 100644 components/masking_functions/include/masking_functions/query_cache.hpp create mode 100644 components/masking_functions/src/masking_functions/dictionary_container.cpp create mode 100644 components/masking_functions/src/masking_functions/query_cache.cpp diff --git a/components/masking_functions/CMakeLists.txt b/components/masking_functions/CMakeLists.txt index 97d1ca56835b..891c708274a9 100644 --- a/components/masking_functions/CMakeLists.txt +++ b/components/masking_functions/CMakeLists.txt @@ -28,7 +28,9 @@ set(DATAMASKING_SOURCES src/masking_functions/charset_string.cpp src/masking_functions/charset_string_operations.cpp + src/masking_functions/dictionary_container.cpp src/masking_functions/query_builder.cpp + src/masking_functions/query_cache.cpp src/masking_functions/random_string_generators.cpp src/masking_functions/registration_routines.cpp src/masking_functions/sql_context.cpp @@ -39,8 +41,10 @@ set(DATAMASKING_SOURCES include/masking_functions/charset_string_operations.hpp include/masking_functions/command_service_tuple_fwd.hpp include/masking_functions/command_service_tuple.hpp + include/masking_functions/dictionary_container.hpp include/masking_functions/primitive_singleton.hpp include/masking_functions/query_builder.hpp + include/masking_functions/query_cache.hpp include/masking_functions/random_string_generators.hpp include/masking_functions/registration_routines.hpp include/masking_functions/sql_context.hpp diff --git a/components/masking_functions/include/masking_functions/dictionary_container.hpp b/components/masking_functions/include/masking_functions/dictionary_container.hpp new file mode 100644 index 000000000000..67db3673711c --- /dev/null +++ b/components/masking_functions/include/masking_functions/dictionary_container.hpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_DICT_CONTAINER_HPP +#define MASKING_FUNCTIONS_DICT_CONTAINER_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace masking_functions { + +using optional_string = std::optional; + +class dictionary_container { + struct term_container { + explicit term_container(std::string term) : term_list{std::move(term)} {} + mutable std::shared_mutex term_mutex; + std::set term_list; + }; + + public: + bool contains(const std::string &dictionary_name, + const std::string &term) const noexcept; + optional_string get(const std::string &dictionary_name) const noexcept; + bool remove(const std::string &dictionary_name) noexcept; + bool remove(const std::string &dictionary_name, + const std::string &term) noexcept; + bool insert(const std::string &dictionary_name, + const std::string &term) noexcept; + + private: + std::map m_container; +}; + +using optional_dictionary_container = std::optional; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_DICT_CONTAINER_HPP diff --git a/components/masking_functions/include/masking_functions/query_builder.hpp b/components/masking_functions/include/masking_functions/query_builder.hpp index 3ecfb84042b6..3aa52c08e22f 100644 --- a/components/masking_functions/include/masking_functions/query_builder.hpp +++ b/components/masking_functions/include/masking_functions/query_builder.hpp @@ -19,8 +19,6 @@ #include #include -#include "masking_functions/charset_string_fwd.hpp" - namespace masking_functions { // A helper class which allows to easily construct SQL-statements necessary @@ -56,26 +54,18 @@ class query_builder { return term_field_name_; } - std::string select_random_term_for_dictionary( - const charset_string &dictionary_name) const { - return select_term_for_dictionary_internal(dictionary_name, nullptr); - } - std::string check_term_presence_in_dictionary( - const charset_string &dictionary_name, const charset_string &term) const { - return select_term_for_dictionary_internal(dictionary_name, &term); - } + std::string select_all_from_dictionary() const; - std::string insert_ignore_record(const charset_string &dictionary_name, - const charset_string &term) const; + std::string insert_ignore_record(const std::string &dictionary_name, + const std::string &term) const; - std::string delete_for_dictionary( - const charset_string &dictionary_name) const { + std::string delete_for_dictionary(const std::string &dictionary_name) const { return delete_for_dictionary_and_opt_term_internal(dictionary_name, nullptr); } - std::string delete_for_dictionary_and_term( - const charset_string &dictionary_name, const charset_string &term) const { + std::string delete_for_dictionary_and_term(const std::string &dictionary_name, + const std::string &term) const { return delete_for_dictionary_and_opt_term_internal(dictionary_name, &term); } @@ -85,13 +75,8 @@ class query_builder { std::string dictionary_field_name_; std::string term_field_name_; - std::string select_term_for_dictionary_internal( - const charset_string &dictionary_name, - const charset_string *opt_term) const; - std::string delete_for_dictionary_and_opt_term_internal( - const charset_string &dictionary_name, - const charset_string *opt_term) const; + const std::string &dictionary_name, const std::string *opt_term) const; }; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/query_cache.hpp b/components/masking_functions/include/masking_functions/query_cache.hpp new file mode 100644 index 000000000000..592d812f6de1 --- /dev/null +++ b/components/masking_functions/include/masking_functions/query_cache.hpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_QUERY_CACHE_HPP +#define MASKING_FUNCTIONS_QUERY_CACHE_HPP + +#include +#include + +#include "masking_functions/dictionary_container.hpp" + +namespace masking_functions { + +class query_cache { + public: + query_cache(); + + bool contains(const std::string &dictionary_name, + const std::string &term) const; + optional_string get(const std::string &dictionary_name) const; + bool remove(const std::string &dictionary_name); + bool remove(const std::string &dictionary_name, const std::string &term); + bool insert(const std::string &dictionary_name, const std::string &term); + bool load_cache(); + + private: + mutable std::shared_mutex m_dict_mut; + dictionary_container m_dict_cache; +}; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_QUERY_CACHE_HPP diff --git a/components/masking_functions/include/masking_functions/sql_context.hpp b/components/masking_functions/include/masking_functions/sql_context.hpp index 3a7aee933d30..d88b6de1bc16 100644 --- a/components/masking_functions/include/masking_functions/sql_context.hpp +++ b/components/masking_functions/include/masking_functions/sql_context.hpp @@ -16,11 +16,13 @@ #ifndef MASKING_FUNCTIONS_SQL_CONTEXT_HPP #define MASKING_FUNCTIONS_SQL_CONTEXT_HPP +#include #include #include #include #include "masking_functions/command_service_tuple_fwd.hpp" +#include "masking_functions/dictionary_container.hpp" namespace masking_functions { @@ -30,8 +32,6 @@ namespace masking_functions { // construction. class sql_context { public: - using optional_string = std::optional; - explicit sql_context(const command_service_tuple &services); sql_context(sql_context const &) = delete; @@ -46,9 +46,7 @@ class sql_context { return *impl_.get_deleter().services; } - // Executes a query where we either expect a single result (one row one - // column), or nothing - optional_string query_single_value(std::string_view query); + optional_dictionary_container query_list(std::string_view query); bool execute(std::string_view query); diff --git a/components/masking_functions/src/masking_functions/dictionary_container.cpp b/components/masking_functions/src/masking_functions/dictionary_container.cpp new file mode 100644 index 000000000000..dd0f4e33fc22 --- /dev/null +++ b/components/masking_functions/src/masking_functions/dictionary_container.cpp @@ -0,0 +1,91 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#include "masking_functions/dictionary_container.hpp" + +#include "masking_functions/random_string_generators.hpp" + +#include + +namespace masking_functions { + +bool dictionary_container::contains(const std::string &dictionary_name, + const std::string &term) const noexcept { + const auto it = m_container.find(dictionary_name); + + if (it == m_container.cend()) { + return false; + } + + if (term.length() == 0) { + return true; + } + + std::shared_lock term_read_lock{it->second.term_mutex}; + return it->second.term_list.count(term) > 0; +} + +optional_string dictionary_container::get( + const std::string &dictionary_name) const noexcept { + const auto dict_it = m_container.find(dictionary_name); + + if (dict_it == m_container.cend()) { + return std::nullopt; + } + + std::shared_lock term_read_lock{dict_it->second.term_mutex}; + + if (dict_it->second.term_list.empty()) { + return std::nullopt; + } + + auto random_step = random_number(0, dict_it->second.term_list.size() - 1); + auto term_it = dict_it->second.term_list.begin(); + std::advance(term_it, random_step); + + return optional_string{std::in_place, *term_it}; +} + +bool dictionary_container::remove(const std::string &dictionary_name) noexcept { + return m_container.erase(dictionary_name) > 0; +} + +bool dictionary_container::remove(const std::string &dictionary_name, + const std::string &term) noexcept { + const auto dict_it = m_container.find(dictionary_name); + + if (dict_it == m_container.cend()) { + return false; + } + + std::unique_lock term_write_lock{dict_it->second.term_mutex}; + return dict_it->second.term_list.erase(term) > 0; +} + +bool dictionary_container::insert(const std::string &dictionary_name, + const std::string &term) noexcept { + auto it = m_container.find(dictionary_name); + + if (it != m_container.end()) { + std::unique_lock term_write_lock{it->second.term_mutex}; + it->second.term_list.emplace(term); + } else { + m_container.emplace(dictionary_name, term); + } + + return true; +} + +} // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/query_builder.cpp b/components/masking_functions/src/masking_functions/query_builder.cpp index 809606d45622..802d47f18cba 100644 --- a/components/masking_functions/src/masking_functions/query_builder.cpp +++ b/components/masking_functions/src/masking_functions/query_builder.cpp @@ -17,52 +17,40 @@ #include "masking_functions/query_builder.hpp" -#include "masking_functions/sql_escape_functions.hpp" - namespace masking_functions { std::string query_builder::insert_ignore_record( - const charset_string &dictionary_name, const charset_string &term) const { + const std::string &dictionary_name, const std::string &term) const { std::ostringstream oss; oss << "INSERT IGNORE INTO " << get_database_name() << '.' << get_table_name() << " (" << get_dictionary_field_name() << ", " << get_term_field_name() - << ')' << " VALUES('" << escape_string(dictionary_name) << "', '" - << escape_string(term) << "')"; + << ')' << " VALUES('" << dictionary_name << "', '" << term << "')"; return oss.str(); } -std::string query_builder::select_term_for_dictionary_internal( - const charset_string &dictionary_name, - const charset_string *opt_term) const { +std::string query_builder::select_all_from_dictionary() const { std::ostringstream oss; // In our implementation there is no requirement that the `Term` field in // the `mysql.masking_dictionaries` table must be in `utf8mb4`. So, by // adding CONVERT(Term USING utf8mb4) we support other character sets in // the underlying table as well. oss << "SELECT " + << "CONVERT(" << get_dictionary_field_name() << " USING " + << default_result_character_set << "), " << "CONVERT(" << get_term_field_name() << " USING " << default_result_character_set << ") FROM " << get_database_name() << '.' - << get_table_name() << " WHERE " << get_dictionary_field_name() << " = '" - << escape_string(dictionary_name) << '\''; - if (opt_term != nullptr) { - oss << " AND " << get_term_field_name() << " = '" - << escape_string(*opt_term) << '\''; - } else { - oss << " ORDER BY RAND() LIMIT 1"; - } + << get_table_name(); return oss.str(); } std::string query_builder::delete_for_dictionary_and_opt_term_internal( - const charset_string &dictionary_name, - const charset_string *opt_term) const { + const std::string &dictionary_name, const std::string *opt_term) const { std::ostringstream oss; oss << "DELETE FROM " << get_database_name() << '.' << get_table_name() - << " WHERE " << get_dictionary_field_name() << " = '" - << escape_string(dictionary_name) << '\''; + << " WHERE " << get_dictionary_field_name() << " = '" << dictionary_name + << '\''; if (opt_term != nullptr) { - oss << " AND " << get_term_field_name() << " = '" - << escape_string(*opt_term) << '\''; + oss << " AND " << get_term_field_name() << " = '" << *opt_term << '\''; } return oss.str(); } diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp new file mode 100644 index 000000000000..7107c728b265 --- /dev/null +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -0,0 +1,104 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#include "masking_functions/query_cache.hpp" + +#include "masking_functions/command_service_tuple.hpp" +#include "masking_functions/primitive_singleton.hpp" +#include "masking_functions/query_builder.hpp" +#include "masking_functions/sql_context.hpp" + +namespace masking_functions { +namespace { + +using global_command_services = masking_functions::primitive_singleton< + masking_functions::command_service_tuple>; +using global_query_builder = + masking_functions::primitive_singleton; + +} // namespace + +query_cache::query_cache() { load_cache(); } + +bool query_cache::load_cache() { + auto query = global_query_builder::instance().select_all_from_dictionary(); + auto result = + masking_functions::sql_context{global_command_services::instance()} + .query_list(query); + + if (result.has_value()) { + std::unique_lock dict_write_lock{m_dict_mut}; + m_dict_cache = std::move(result.value()); + } + + return result.has_value(); +} + +bool query_cache::contains(const std::string &dictionary_name, + const std::string &term) const { + std::shared_lock dict_read_lock{m_dict_mut}; + return m_dict_cache.contains(dictionary_name, term); +} + +optional_string query_cache::get(const std::string &dictionary_name) const { + std::shared_lock dict_read_lock{m_dict_mut}; + return m_dict_cache.get(dictionary_name); +} + +bool query_cache::remove(const std::string &dictionary_name) { + std::unique_lock dict_write_lock{m_dict_mut}; + + masking_functions::sql_context sql_ctx{global_command_services::instance()}; + auto query = + global_query_builder::instance().delete_for_dictionary(dictionary_name); + + if (!sql_ctx.execute(query)) { + return false; + } + + return m_dict_cache.remove(dictionary_name); +} + +bool query_cache::remove(const std::string &dictionary_name, + const std::string &term) { + std::shared_lock dict_read_lock{m_dict_mut}; + + masking_functions::sql_context sql_ctx{global_command_services::instance()}; + auto query = global_query_builder::instance().delete_for_dictionary_and_term( + dictionary_name, term); + + if (!sql_ctx.execute(query)) { + return false; + } + + return m_dict_cache.remove(dictionary_name, term); +} + +bool query_cache::insert(const std::string &dictionary_name, + const std::string &term) { + std::unique_lock dict_write_lock{m_dict_mut}; + + masking_functions::sql_context sql_ctx{global_command_services::instance()}; + auto query = global_query_builder::instance().insert_ignore_record( + dictionary_name, term); + + if (!sql_ctx.execute(query)) { + return false; + } + + return m_dict_cache.insert(dictionary_name, term); +} + +} // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/registration_routines.cpp b/components/masking_functions/src/masking_functions/registration_routines.cpp index 6debeee35c4a..af4a071f786b 100644 --- a/components/masking_functions/src/masking_functions/registration_routines.cpp +++ b/components/masking_functions/src/masking_functions/registration_routines.cpp @@ -37,9 +37,9 @@ #include "masking_functions/charset_string_operations.hpp" #include "masking_functions/command_service_tuple.hpp" #include "masking_functions/primitive_singleton.hpp" -#include "masking_functions/query_builder.hpp" +#include "masking_functions/query_cache.hpp" #include "masking_functions/random_string_generators.hpp" -#include "masking_functions/sql_context.hpp" +#include "masking_functions/sql_escape_functions.hpp" #include "masking_functions/string_service_tuple.hpp" extern REQUIRES_SERVICE_PLACEHOLDER(udf_registration); @@ -55,10 +55,8 @@ namespace { using global_string_services = masking_functions::primitive_singleton< masking_functions::string_service_tuple>; -using global_command_services = masking_functions::primitive_singleton< - masking_functions::command_service_tuple>; -using global_query_builder = - masking_functions::primitive_singleton; +using global_query_cache = + masking_functions::primitive_singleton; constexpr std::string_view masking_dictionaries_privilege_name = "MASKING_DICTIONARIES_ADMIN"; @@ -960,31 +958,24 @@ class gen_blocklist_impl { if (ctx.is_arg_null(0)) return std::nullopt; const auto cs_term = make_charset_string_from_arg(ctx, 0); - const auto cs_dict_a = make_charset_string_from_arg(ctx, 1); - const auto cs_dict_b = make_charset_string_from_arg(ctx, 2); + const auto cs_term_escaped = escape_string(cs_term); + const auto cs_dict_a_escaped = + escape_string(make_charset_string_from_arg(ctx, 1)); + const auto cs_dict_b_escaped = + escape_string(make_charset_string_from_arg(ctx, 2)); { - masking_functions::sql_context sql_ctx{ - global_command_services::instance()}; - - auto query = - global_query_builder::instance().check_term_presence_in_dictionary( - cs_dict_a, cs_term); - auto sresult = sql_ctx.query_single_value(query); + auto sresult = global_query_cache::instance().contains(cs_dict_a_escaped, + cs_term_escaped); if (!sresult) { - return {std::string{cs_term.get_buffer()}}; + return cs_term_escaped; } } - masking_functions::sql_context sql_ctx{global_command_services::instance()}; - - auto query = - global_query_builder::instance().select_random_term_for_dictionary( - cs_dict_b); - auto sresult = sql_ctx.query_single_value(query); + auto sresult = global_query_cache::instance().get(cs_dict_b_escaped); - if (sresult && sresult->size() > 0) { + if (sresult && !sresult->empty()) { masking_functions::charset_string utf8_result{ global_string_services::instance(), *sresult, masking_functions::charset_string::utf8mb4_collation_name}; @@ -992,9 +983,9 @@ class gen_blocklist_impl { const auto &cs_result = masking_functions::smart_convert_to_collation( utf8_result, cs_term.get_collation(), conversion_buffer); return {std::string{cs_result.get_buffer()}}; - } else { - return std::nullopt; } + + return std::nullopt; } }; @@ -1025,20 +1016,54 @@ class gen_dictionary_impl { mysqlpp::udf_result_t calculate( const mysqlpp::udf_context &ctx) { - const auto cs_dictionary = make_charset_string_from_arg(ctx, 0); + const auto cs_dictionary_escaped = + escape_string(make_charset_string_from_arg(ctx, 0)); + auto sresult = global_query_cache::instance().get(cs_dictionary_escaped); - masking_functions::sql_context sql_ctx{global_command_services::instance()}; + if (sresult && !sresult->empty()) { + return *sresult; + } - auto query = - global_query_builder::instance().select_random_term_for_dictionary( - cs_dictionary); - auto sresult = sql_ctx.query_single_value(query); + return std::nullopt; + } +}; - if (sresult && sresult->size() > 0) { - return *sresult; - } else { +// +// masking_dictionaries_flush() +// +// Flush the data from the masking dictionaries table to the memory cache. +class masking_dictionaries_flush_impl { + public: + explicit masking_dictionaries_flush_impl(mysqlpp::udf_context &ctx) { + if (!have_masking_admin_privilege()) { + throw std::invalid_argument{ + "Function requires " + + std::string(masking_dictionaries_privilege_name) + " privilege"}; + } + + if (ctx.get_number_of_args() > 0) + throw std::invalid_argument{ + "Wrong argument list: masking_dictionaries_flush()"}; + + ctx.mark_result_nullable(true); + // Calling this UDF two or more times has exactly the same effect as just + // calling it once. So, we mark the result as 'const' here so that the + // optimizer could use this info to eliminate unnecessary calls. + ctx.mark_result_const(true); + + mysqlpp::udf_context_charset_extension charset_ext{ + mysql_service_mysql_udf_metadata}; + charset_ext.set_return_value_collation( + ctx, masking_functions::charset_string::default_collation_name); + } + + mysqlpp::udf_result_t calculate(const mysqlpp::udf_context &ctx + [[maybe_unused]]) { + if (!global_query_cache::instance().load_cache()) { return std::nullopt; } + + return "1"; } }; @@ -1078,17 +1103,14 @@ class masking_dictionary_remove_impl { mysqlpp::udf_result_t calculate( const mysqlpp::udf_context &ctx) { - const auto cs_dictionary = make_charset_string_from_arg(ctx, 0); - - masking_functions::sql_context sql_ctx{global_command_services::instance()}; + const auto cs_dictionary_escaped = + escape_string(make_charset_string_from_arg(ctx, 0)); - auto query = - global_query_builder::instance().delete_for_dictionary(cs_dictionary); - if (!sql_ctx.execute(query)) { + if (!global_query_cache::instance().remove(cs_dictionary_escaped)) { return std::nullopt; - } else { - return "1"; } + + return "1"; } }; @@ -1133,19 +1155,17 @@ class masking_dictionary_term_add_impl { mysqlpp::udf_result_t calculate( const mysqlpp::udf_context &ctx) { - const auto cs_dictionary = make_charset_string_from_arg(ctx, 0); - const auto cs_term = make_charset_string_from_arg(ctx, 1); - - masking_functions::sql_context sql_ctx{global_command_services::instance()}; - - auto query = global_query_builder::instance().insert_ignore_record( - cs_dictionary, cs_term); + const auto cs_dictionary_escaped = + escape_string(make_charset_string_from_arg(ctx, 0)); + const auto cs_term_escaped = + escape_string(make_charset_string_from_arg(ctx, 1)); - if (!sql_ctx.execute(query)) { + if (!global_query_cache::instance().insert(cs_dictionary_escaped, + cs_term_escaped)) { return std::nullopt; - } else { - return "1"; } + + return "1"; } }; @@ -1190,19 +1210,17 @@ class masking_dictionary_term_remove_impl { mysqlpp::udf_result_t calculate( const mysqlpp::udf_context &ctx) { - const auto cs_dictionary = make_charset_string_from_arg(ctx, 0); - const auto cs_term = make_charset_string_from_arg(ctx, 1); - - masking_functions::sql_context sql_ctx{global_command_services::instance()}; + const auto cs_dictionary_escaped = + escape_string(make_charset_string_from_arg(ctx, 0)); + const auto cs_term_escaped = + escape_string(make_charset_string_from_arg(ctx, 1)); - auto query = - global_query_builder::instance().delete_for_dictionary_and_term( - cs_dictionary, cs_term); - if (!sql_ctx.execute(query)) { + if (!global_query_cache::instance().remove(cs_dictionary_escaped, + cs_term_escaped)) { return std::nullopt; - } else { - return "1"; } + + return "1"; } }; @@ -1229,6 +1247,7 @@ DECLARE_STRING_UDF_AUTO(mask_uk_nin) DECLARE_STRING_UDF_AUTO(mask_uuid) DECLARE_STRING_UDF_AUTO(gen_blocklist) DECLARE_STRING_UDF_AUTO(gen_dictionary) +DECLARE_STRING_UDF_AUTO(masking_dictionaries_flush) DECLARE_STRING_UDF_AUTO(masking_dictionary_remove) DECLARE_STRING_UDF_AUTO(masking_dictionary_term_add) DECLARE_STRING_UDF_AUTO(masking_dictionary_term_remove) @@ -1256,6 +1275,7 @@ std::array known_udfs{DECLARE_UDF_INFO_AUTO(gen_range), DECLARE_UDF_INFO_AUTO(mask_uuid), DECLARE_UDF_INFO_AUTO(gen_blocklist), DECLARE_UDF_INFO_AUTO(gen_dictionary), + DECLARE_UDF_INFO_AUTO(masking_dictionaries_flush), DECLARE_UDF_INFO_AUTO(masking_dictionary_remove), DECLARE_UDF_INFO_AUTO(masking_dictionary_term_add), DECLARE_UDF_INFO_AUTO(masking_dictionary_term_remove)}; diff --git a/components/masking_functions/src/masking_functions/sql_context.cpp b/components/masking_functions/src/masking_functions/sql_context.cpp index b351c0568f0c..8266afb91956 100644 --- a/components/masking_functions/src/masking_functions/sql_context.cpp +++ b/components/masking_functions/src/masking_functions/sql_context.cpp @@ -82,8 +82,7 @@ sql_context::sql_context(const command_service_tuple &services) } } -sql_context::optional_string sql_context::query_single_value( - std::string_view query) { +optional_dictionary_container sql_context::query_list(std::string_view query) { if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), query.length()) != 0) { throw std::runtime_error{"Error while executing SQL query"}; @@ -116,19 +115,22 @@ sql_context::optional_string sql_context::query_single_value( &row_count) != 0) throw std::runtime_error{"Couldn't query row count"}; - if (row_count == 0) return std::nullopt; + optional_dictionary_container result{std::in_place, dictionary_container{}}; - if (row_count > 1) throw std::runtime_error{"Query returned more than 1 row"}; + for (auto i = row_count; i > 0; --i) { + MYSQL_ROW_H row = nullptr; + ulong *length = nullptr; - MYSQL_ROW_H row = nullptr; - if ((*get_services().query_result->fetch_row)(mysql_res, &row) != 0) - throw std::runtime_error{"Couldn't fetch row"}; + if ((*get_services().query_result->fetch_row)(mysql_res, &row) != 0) + throw std::runtime_error{"Couldn't fetch length"}; + if ((*get_services().query_result->fetch_lengths)(mysql_res, &length) != 0) + throw std::runtime_error{"Couldn't fetch length"}; - ulong *length = nullptr; - if ((*get_services().query_result->fetch_lengths)(mysql_res, &length) != 0) - throw std::runtime_error{"Couldn't fetch lenghts"}; + result->insert(std::string{row[0], length[0]}, + std::string{row[1], length[1]}); + } - return optional_string{std::in_place, row[0], length[0]}; + return result; } bool sql_context::execute(std::string_view query) { diff --git a/mysql-test/suite/component_masking_functions/r/dictionary_operations.result b/mysql-test/suite/component_masking_functions/r/dictionary_operations.result index 5b2ea684ffbf..4cff6e1e1547 100644 --- a/mysql-test/suite/component_masking_functions/r/dictionary_operations.result +++ b/mysql-test/suite/component_masking_functions/r/dictionary_operations.result @@ -5,9 +5,16 @@ # * masking_dictionary_term_add # * masking_dictionary_term_remove # * masking_dictionary_remove +# * masking_dictionaries_flush # INSTALL COMPONENT 'file://component_masking_functions'; -# arity checks +# +# Create users with and without MASKING_DICTIONARIES_ADMIN privilege +CREATE USER udftest_unpriv@localhost; +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +# +# arity checks, run for privileged user SELECT gen_dictionary(); ERROR HY000: Can't initialize function 'gen_dictionary'; Wrong argument list: gen_dictionary(string) SELECT gen_dictionary('', ''); @@ -21,21 +28,24 @@ ERROR HY000: Can't initialize function 'gen_blocklist'; Wrong argument list: gen SELECT gen_blocklist('', '', '', ''); ERROR HY000: Can't initialize function 'gen_blocklist'; Wrong argument list: gen_blocklist(string, string, string) SELECT masking_dictionary_term_add(); -ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Wrong argument list: masking_dictionary_term_add(string, string) SELECT masking_dictionary_term_add(''); -ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Wrong argument list: masking_dictionary_term_add(string, string) SELECT masking_dictionary_term_add('', '', ''); -ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Wrong argument list: masking_dictionary_term_add(string, string) SELECT masking_dictionary_term_remove(); -ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Wrong argument list: masking_dictionary_term_remove(string, string) SELECT masking_dictionary_term_remove(''); -ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Wrong argument list: masking_dictionary_term_remove(string, string) SELECT masking_dictionary_term_remove('', '', ''); -ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Wrong argument list: masking_dictionary_term_remove(string, string) SELECT masking_dictionary_remove(); -ERROR HY000: Can't initialize function 'masking_dictionary_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_remove'; Wrong argument list: masking_dictionary_remove(string) SELECT masking_dictionary_remove('', ''); -ERROR HY000: Can't initialize function 'masking_dictionary_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege +ERROR HY000: Can't initialize function 'masking_dictionary_remove'; Wrong argument list: masking_dictionary_remove(string) +SELECT masking_dictionaries_flush(''); +ERROR HY000: Can't initialize function 'masking_dictionaries_flush'; Wrong argument list: masking_dictionaries_flush() +# # argument nullness checks for functions not requiring MASKING_DICTIONARIES_ADMIN SELECT gen_dictionary(NULL); ERROR HY000: gen_dictionary UDF failed; argument 1 cannot be null @@ -43,13 +53,16 @@ SELECT gen_blocklist('Berlin', NULL, 'us_cities'); ERROR HY000: gen_blocklist UDF failed; argument 2 cannot be null SELECT gen_blocklist('Berlin', 'de_cities', NULL); ERROR HY000: gen_blocklist UDF failed; argument 3 cannot be null +# # checking the case when there is no mysql.masking_dictionaries table SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); ERROR HY000: Error in command service backend interface, because of : "Table 'mysql.masking_dictionaries' doesn't exist" SELECT gen_dictionary('us_cities'); ERROR HY000: Error in command service backend interface, because of : "Table 'mysql.masking_dictionaries' doesn't exist" +# # NULL for NULL checks include/assert.inc [gen_blocklist() for the NULL primary argument should return NULL] +# # checking the case when mysql.masking_dictionaries has invalid structure CREATE TABLE mysql.masking_dictionaries( Dictionary VARCHAR(256) NOT NULL, @@ -60,14 +73,15 @@ ERROR HY000: Error in command service backend interface, because of : "Unknown c SELECT gen_dictionary('us_cities'); ERROR HY000: Error in command service backend interface, because of : "Unknown column 'Term' in 'field list'" DROP TABLE mysql.masking_dictionaries; +# # checks for an unprivileged user -CREATE USER udftest; SELECT masking_dictionary_term_add('single_dict', 'entry'); ERROR HY000: Can't initialize function 'masking_dictionary_term_add'; Function requires MASKING_DICTIONARIES_ADMIN privilege SELECT masking_dictionary_term_remove('single_dict', 'entry'); ERROR HY000: Can't initialize function 'masking_dictionary_term_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege SELECT masking_dictionary_remove('single_dict'); ERROR HY000: Can't initialize function 'masking_dictionary_remove'; Function requires MASKING_DICTIONARIES_ADMIN privilege +# # checking the case when mysql.masking_dictionaries is empty CREATE TABLE mysql.masking_dictionaries( Dictionary VARCHAR(256) NOT NULL, @@ -86,6 +100,9 @@ INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city3'); INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city4'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто1'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто2'); +SELECT masking_dictionaries_flush(); +masking_dictionaries_flush() +1 include/assert.inc [gen_dictionary on a non-existing dictionary must return NULL] SET @check_expression_result = gen_dictionary('us_cities'); include/assert.inc [the result of evaluating 'gen_dictionary('us_cities')' must match the 'city[[:digit:]]{1}' pattern] @@ -117,7 +134,10 @@ include/assert.inc [charset of the result of evaluating 'gen_blocklist('city1', include/assert.inc [collation of the result of evaluating 'gen_blocklist('city1', 'us_cities', 'укр_міста')' must be 'utf8mb4_0900_ai_ci'] include/assert.inc [gen_blocklist when to_dictionary does not exist must return NULL] DELETE FROM mysql.masking_dictionaries; -GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest; +SELECT masking_dictionaries_flush(); +masking_dictionaries_flush() +1 +# # argument nullness checks for functions requiring MASKING_DICTIONARIES_ADMIN SELECT masking_dictionary_term_add(NULL, 'entry'); ERROR HY000: masking_dictionary_term_add UDF failed; argument 1 cannot be null @@ -129,6 +149,7 @@ SELECT masking_dictionary_term_remove('single_dict', NULL); ERROR HY000: masking_dictionary_term_remove UDF failed; argument 2 cannot be null SELECT masking_dictionary_remove(NULL); ERROR HY000: masking_dictionary_remove UDF failed; argument 1 cannot be null +# # checks for a user with MASKING_DICTIONARIES_ADMIN privilege using various character sets / collations SET @regular_charset_list = '[ "utf8mb4", "utf8mb3", "utf16", "utf16le", "utf32", "ucs2", "koi8u"]'; SET @special_charset_list = '[ "latin2", "ascii", "binary"]'; @@ -4993,6 +5014,7 @@ include/assert.inc [charset of the result of evaluating 'masking_dictionary_remo include/assert.inc [collation of the result of evaluating 'masking_dictionary_remove('словник')' must be 'utf8mb4_0900_ai_ci'] include/assert.inc [mysql.masking_dictionaries must have 0 records] ################################ -DROP USER udftest; +DROP USER udftest_unpriv@localhost; +DROP USER udftest_priv@localhost; UNINSTALL COMPONENT 'file://component_masking_functions'; DROP TABLE mysql.masking_dictionaries; diff --git a/mysql-test/suite/component_masking_functions/t/dictionary_operations.test b/mysql-test/suite/component_masking_functions/t/dictionary_operations.test index db73a98b9ab7..da2dd0655c23 100644 --- a/mysql-test/suite/component_masking_functions/t/dictionary_operations.test +++ b/mysql-test/suite/component_masking_functions/t/dictionary_operations.test @@ -7,13 +7,26 @@ --echo # * masking_dictionary_term_add --echo # * masking_dictionary_term_remove --echo # * masking_dictionary_remove +--echo # * masking_dictionaries_flush --echo # --source include/count_sessions.inc INSTALL COMPONENT 'file://component_masking_functions'; ---echo # arity checks +--echo # +--echo # Create users with and without MASKING_DICTIONARIES_ADMIN privilege +CREATE USER udftest_unpriv@localhost; +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +--connect(con_unpriv,localhost,udftest_unpriv,,) +--connect(con_priv,localhost,udftest_priv,,) +--connection default + +--echo # +--echo # arity checks, run for privileged user +--connection con_priv + --error ER_CANT_INITIALIZE_UDF SELECT gen_dictionary(); --error ER_CANT_INITIALIZE_UDF @@ -47,8 +60,12 @@ SELECT masking_dictionary_remove(); --error ER_CANT_INITIALIZE_UDF SELECT masking_dictionary_remove('', ''); +--error ER_CANT_INITIALIZE_UDF +SELECT masking_dictionaries_flush(''); +--echo # --echo # argument nullness checks for functions not requiring MASKING_DICTIONARIES_ADMIN +--connection con_unpriv --error ER_UDF_ERROR SELECT gen_dictionary(NULL); @@ -58,6 +75,7 @@ SELECT gen_blocklist('Berlin', NULL, 'us_cities'); SELECT gen_blocklist('Berlin', 'de_cities', NULL); +--echo # --echo # checking the case when there is no mysql.masking_dictionaries table --error ER_COMMAND_SERVICE_BACKEND_FAILED SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); @@ -66,17 +84,21 @@ SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); SELECT gen_dictionary('us_cities'); +--echo # --echo # NULL for NULL checks --let $assert_cond = gen_blocklist(NULL, "de_cities", "us_cities") IS NULL --let $assert_text = gen_blocklist() for the NULL primary argument should return NULL --source include/assert.inc +--echo # --echo # checking the case when mysql.masking_dictionaries has invalid structure +--connection default CREATE TABLE mysql.masking_dictionaries( Dictionary VARCHAR(256) NOT NULL, UNIQUE INDEX dictionary_term_idx (Dictionary) ) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; +--connection con_unpriv --error ER_COMMAND_SERVICE_BACKEND_FAILED SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); @@ -84,13 +106,12 @@ SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); --error ER_COMMAND_SERVICE_BACKEND_FAILED SELECT gen_dictionary('us_cities'); +--connection default DROP TABLE mysql.masking_dictionaries; - +--echo # --echo # checks for an unprivileged user -CREATE USER udftest; ---connect(con1,localhost,udftest,,) ---connection con1 +--connection con_unpriv --error ER_CANT_INITIALIZE_UDF SELECT masking_dictionary_term_add('single_dict', 'entry'); @@ -101,15 +122,16 @@ SELECT masking_dictionary_term_remove('single_dict', 'entry'); --error ER_CANT_INITIALIZE_UDF SELECT masking_dictionary_remove('single_dict'); ---connection default +--echo # --echo # checking the case when mysql.masking_dictionaries is empty +--connection default CREATE TABLE mysql.masking_dictionaries( Dictionary VARCHAR(256) NOT NULL, Term VARCHAR(256) NOT NULL, UNIQUE INDEX dictionary_term_idx (Dictionary, Term) ) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; +--connection con_unpriv ---connection con1 --let $assert_cond = gen_dictionary("us_cities") IS NULL --let $assert_text = gen_dictionary on an empty table must return NULL --source include/assert.inc @@ -130,7 +152,10 @@ INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city4'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто1'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто2'); ---connection con1 +--connection con_priv +SELECT masking_dictionaries_flush(); +--connection con_unpriv + --let $assert_cond = gen_dictionary("de_cities") IS NULL --let $assert_text = gen_dictionary on a non-existing dictionary must return NULL --source include/assert.inc @@ -182,15 +207,11 @@ INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто2' --source include/assert.inc --connection default ---disconnect con1 - DELETE FROM mysql.masking_dictionaries; +--connection con_priv +SELECT masking_dictionaries_flush(); -GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest; - ---connect(con1,localhost,udftest,,) ---connection con1 - +--echo # --echo # argument nullness checks for functions requiring MASKING_DICTIONARIES_ADMIN --error ER_UDF_ERROR SELECT masking_dictionary_term_add(NULL, 'entry'); @@ -206,6 +227,7 @@ SELECT masking_dictionary_term_remove('single_dict', NULL); SELECT masking_dictionary_remove(NULL); +--echo # --echo # checks for a user with MASKING_DICTIONARIES_ADMIN privilege using various character sets / collations --let $dollar = `SELECT _utf8mb4 0x24` @@ -269,7 +291,7 @@ while($i < $number_of_charsets) { --source include/assert.inc - --connection con1 + --connection con_priv --let $expected_charset = utf8mb4 --let $iteration = 0 while ($iteration < $number_of_iterations) { @@ -338,7 +360,7 @@ while($i < $number_of_charsets) { --source include/assert.inc - --connection con1 + --connection con_priv --echo masking_dictionary_remove checks --let $dictionary = dictionary --let $expression = masking_dictionary_remove('$dictionary') @@ -356,7 +378,7 @@ while($i < $number_of_charsets) { --let $assert_text = mysql.masking_dictionaries must have 3 records --source include/assert.inc - --connection con1 + --connection con_priv --let $dictionary = словник --let $expression = masking_dictionary_remove('$dictionary') --let $regexp = @@ -369,16 +391,18 @@ while($i < $number_of_charsets) { --let $assert_text = mysql.masking_dictionaries must have 0 records --source include/assert.inc - --connection con1 + --connection con_priv --echo ################################ --inc $i } --connection default ---disconnect con1 +--disconnect con_priv +--disconnect con_unpriv -DROP USER udftest; +DROP USER udftest_unpriv@localhost; +DROP USER udftest_priv@localhost; UNINSTALL COMPONENT 'file://component_masking_functions'; From 15cfaae2aa1af58d69d8f77a0ea2684991319533 Mon Sep 17 00:00:00 2001 From: Oleksandr Kachan Date: Fri, 29 Mar 2024 19:17:14 +0200 Subject: [PATCH 2/8] PS-9148: Add masking_functions.masking_database sys var support https://perconadev.atlassian.net/browse/PS-9148 The masking_functions.masking_database system variable for the masking_functions component specifies database used for data masking dictionaries. --- components/masking_functions/CMakeLists.txt | 2 + .../masking_functions/query_builder.hpp | 7 +- .../include/masking_functions/sys_vars.hpp | 31 +++++++ .../masking_functions/src/component.cpp | 20 +++++ .../src/masking_functions/sys_vars.cpp | 89 +++++++++++++++++++ .../r/sys_var_masking_database.result | 25 ++++++ .../t/sys_var_masking_database.test | 51 +++++++++++ 7 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 components/masking_functions/include/masking_functions/sys_vars.hpp create mode 100644 components/masking_functions/src/masking_functions/sys_vars.cpp create mode 100644 mysql-test/suite/component_masking_functions/r/sys_var_masking_database.result create mode 100644 mysql-test/suite/component_masking_functions/t/sys_var_masking_database.test diff --git a/components/masking_functions/CMakeLists.txt b/components/masking_functions/CMakeLists.txt index 891c708274a9..9f8ad21658b0 100644 --- a/components/masking_functions/CMakeLists.txt +++ b/components/masking_functions/CMakeLists.txt @@ -35,6 +35,7 @@ set(DATAMASKING_SOURCES src/masking_functions/registration_routines.cpp src/masking_functions/sql_context.cpp src/masking_functions/sql_escape_functions.cpp + src/masking_functions/sys_vars.cpp include/masking_functions/charset_string_fwd.hpp include/masking_functions/charset_string.hpp @@ -51,6 +52,7 @@ set(DATAMASKING_SOURCES include/masking_functions/sql_escape_functions.hpp include/masking_functions/string_service_tuple_fwd.hpp include/masking_functions/string_service_tuple.hpp + include/masking_functions/sys_vars.hpp ) ### Configuration ### diff --git a/components/masking_functions/include/masking_functions/query_builder.hpp b/components/masking_functions/include/masking_functions/query_builder.hpp index 3aa52c08e22f..6d6629914597 100644 --- a/components/masking_functions/include/masking_functions/query_builder.hpp +++ b/components/masking_functions/include/masking_functions/query_builder.hpp @@ -16,6 +16,8 @@ #ifndef MASKING_FUNCTIONS_QUERY_BUILDER_HPP #define MASKING_FUNCTIONS_QUERY_BUILDER_HPP +#include "masking_functions/sys_vars.hpp" + #include #include @@ -27,14 +29,13 @@ class query_builder { public: static constexpr std::string_view default_result_character_set = "utf8mb4"; - static constexpr std::string_view default_database_name = "mysql"; static constexpr std::string_view default_table_name = "masking_dictionaries"; static constexpr std::string_view default_dictionary_field_name = "Dictionary"; static constexpr std::string_view default_term_field_name = "Term"; - query_builder( - std::string_view database_name = default_database_name, + explicit query_builder( + std::string_view database_name = sys_vars::get_dict_database_name(), std::string_view table_name = default_table_name, std::string_view dictionary_field_name = default_dictionary_field_name, std::string_view term_field_name = default_term_field_name) diff --git a/components/masking_functions/include/masking_functions/sys_vars.hpp b/components/masking_functions/include/masking_functions/sys_vars.hpp new file mode 100644 index 000000000000..45a80992ef2f --- /dev/null +++ b/components/masking_functions/include/masking_functions/sys_vars.hpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_SYS_VARS_HPP +#define MASKING_FUNCTIONS_SYS_VARS_HPP + +#include + +namespace masking_functions::sys_vars { + +std::string_view get_dict_database_name() noexcept; + +bool register_sys_vars(); +bool unregister_sys_vars(); +bool validate(); + +} // namespace masking_functions::sys_vars + +#endif // MASKING_FUNCTIONS_SYS_VARS_HPP diff --git a/components/masking_functions/src/component.cpp b/components/masking_functions/src/component.cpp index 7650d571a899..d37e24084934 100644 --- a/components/masking_functions/src/component.cpp +++ b/components/masking_functions/src/component.cpp @@ -18,6 +18,7 @@ #include +#include #include #include #include @@ -36,6 +37,7 @@ #include "masking_functions/primitive_singleton.hpp" #include "masking_functions/registration_routines.hpp" #include "masking_functions/string_service_tuple.hpp" +#include "masking_functions/sys_vars.hpp" // defined as a macro because needed both raw and stringized #define CURRENT_COMPONENT_NAME masking_functions @@ -65,6 +67,8 @@ REQUIRES_SERVICE_PLACEHOLDER(mysql_udf_metadata); REQUIRES_SERVICE_PLACEHOLDER(mysql_current_thread_reader); REQUIRES_SERVICE_PLACEHOLDER(mysql_thd_security_context); REQUIRES_SERVICE_PLACEHOLDER(global_grants_check); +REQUIRES_SERVICE_PLACEHOLDER(component_sys_variable_register); +REQUIRES_SERVICE_PLACEHOLDER(component_sys_variable_unregister); REQUIRES_SERVICE_PLACEHOLDER(log_builtins); REQUIRES_SERVICE_PLACEHOLDER(log_builtins_string); @@ -125,6 +129,14 @@ static mysql_service_status_t component_init() { return 1; } + if (!masking_functions::sys_vars::register_sys_vars() || + !masking_functions::sys_vars::validate()) { + LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, + "Cannot register system variables"); + component_deinit(); + return 1; + } + if (!masking_functions::register_udfs()) { LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, "Cannot register UDFs"); component_deinit(); @@ -149,6 +161,12 @@ static mysql_service_status_t component_deinit() { result = 1; } + if (!masking_functions::sys_vars::unregister_sys_vars()) { + LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, + "Cannot unregister system variables"); + result = 1; + } + if (result == 0) { LogComponentErr(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "Component successfully deinitialized"); @@ -187,6 +205,8 @@ BEGIN_COMPONENT_REQUIRES(CURRENT_COMPONENT_NAME) REQUIRES_SERVICE(mysql_current_thread_reader), REQUIRES_SERVICE(mysql_thd_security_context), REQUIRES_SERVICE(global_grants_check), + REQUIRES_SERVICE(component_sys_variable_register), + REQUIRES_SERVICE(component_sys_variable_unregister), REQUIRES_SERVICE(log_builtins), REQUIRES_SERVICE(log_builtins_string), diff --git a/components/masking_functions/src/masking_functions/sys_vars.cpp b/components/masking_functions/src/masking_functions/sys_vars.cpp new file mode 100644 index 000000000000..e8a44a7dbb58 --- /dev/null +++ b/components/masking_functions/src/masking_functions/sys_vars.cpp @@ -0,0 +1,89 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#include "masking_functions/sys_vars.hpp" + +#include +#include +#include + +#include + +#include +#include + +extern REQUIRES_SERVICE_PLACEHOLDER(component_sys_variable_register); +extern REQUIRES_SERVICE_PLACEHOLDER(component_sys_variable_unregister); +extern REQUIRES_SERVICE_PLACEHOLDER(log_builtins); + +namespace masking_functions::sys_vars { +namespace { + +using str_arg_check_type = STR_CHECK_ARG(str); + +constexpr std::string_view component_name{"masking_functions"}; +constexpr std::string_view masking_database_var_name{"masking_database"}; + +std::string default_database_name{"mysql"}; + +bool is_database_name_initialised = false; + +char *database_name; + +} // namespace + +std::string_view get_dict_database_name() noexcept { return database_name; } + +bool register_sys_vars() { + str_arg_check_type check_db_name{default_database_name.data()}; + + if (mysql_service_component_sys_variable_register->register_variable( + component_name.data(), masking_database_var_name.data(), + PLUGIN_VAR_STR | PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_RQCMDARG | + PLUGIN_VAR_READONLY, + "Specifies the database to use for data masking dictionaries " + "at server startup.", + nullptr, nullptr, static_cast(&check_db_name), + static_cast(&database_name)) != 0) { + return false; + } + is_database_name_initialised = true; + + return true; +} + +bool unregister_sys_vars() { + bool is_success = true; + + if (is_database_name_initialised && + mysql_service_component_sys_variable_unregister->unregister_variable( + component_name.data(), masking_database_var_name.data()) != 0) { + is_success = false; + } + + return is_success; +} + +bool validate() { + if (database_name == nullptr || strlen(database_name) == 0) { + LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, + "Bad masking_functions.masking_database value"); + return false; + } + + return true; +} + +} // namespace masking_functions::sys_vars diff --git a/mysql-test/suite/component_masking_functions/r/sys_var_masking_database.result b/mysql-test/suite/component_masking_functions/r/sys_var_masking_database.result new file mode 100644 index 000000000000..39c939c38fb9 --- /dev/null +++ b/mysql-test/suite/component_masking_functions/r/sys_var_masking_database.result @@ -0,0 +1,25 @@ +INSTALL COMPONENT 'file://component_masking_functions'; +SET GLOBAL masking_functions.masking_database=dict_db; +ERROR HY000: Variable 'masking_functions.masking_database' is a read only variable +SET SESSION masking_functions.masking_database=dict_db; +ERROR HY000: Variable 'masking_functions.masking_database' is a read only variable +# restart: +include/assert_grep.inc [Bad masking_functions.masking_database value] +CREATE DATABASE dict_db; +CREATE TABLE dict_db.masking_dictionaries( +Dictionary VARCHAR(256) NOT NULL, +Term VARCHAR(256) NOT NULL, +UNIQUE INDEX dictionary_term_idx (Dictionary, Term) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; +# restart: +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +SELECT masking_dictionary_term_add('single_dict', 'entry'); +masking_dictionary_term_add('single_dict', 'entry') +1 +SELECT gen_dictionary('single_dict'); +gen_dictionary('single_dict') +entry +UNINSTALL COMPONENT 'file://component_masking_functions'; +DROP DATABASE dict_db; +DROP USER udftest_priv@localhost; diff --git a/mysql-test/suite/component_masking_functions/t/sys_var_masking_database.test b/mysql-test/suite/component_masking_functions/t/sys_var_masking_database.test new file mode 100644 index 000000000000..09a7bf1d4a8c --- /dev/null +++ b/mysql-test/suite/component_masking_functions/t/sys_var_masking_database.test @@ -0,0 +1,51 @@ +--source include/have_masking_functions_component.inc +--source include/count_sessions.inc + +INSTALL COMPONENT 'file://component_masking_functions'; + +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET GLOBAL masking_functions.masking_database=dict_db; + +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET SESSION masking_functions.masking_database=dict_db; + +# Empty DB name +--let $error_log_file = $MYSQLTEST_VARDIR/tmp/masking_functions_error.err +--let $do_not_echo_parameters = 1 +--let $restart_parameters="restart: --log-error=$error_log_file --masking-functions.masking-database=''" +--source include/restart_mysqld.inc + +--let $assert_text = Bad masking_functions.masking_database value +--let $assert_file = $error_log_file +--let $assert_select = Bad masking_functions.masking_database value +--let $assert_count = 1 +--source include/assert_grep.inc + +CREATE DATABASE dict_db; +CREATE TABLE dict_db.masking_dictionaries( + Dictionary VARCHAR(256) NOT NULL, + Term VARCHAR(256) NOT NULL, + UNIQUE INDEX dictionary_term_idx (Dictionary, Term) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; + +--let $restart_parameters="restart: --log-error=$error_log_file --masking-functions.masking-database='dict_db'" +--source include/restart_mysqld.inc + +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +--connect(con_priv,localhost,udftest_priv,,) + +SELECT masking_dictionary_term_add('single_dict', 'entry'); +SELECT gen_dictionary('single_dict'); + +--connection default +--disconnect con_priv + +# +# Cleanup +UNINSTALL COMPONENT 'file://component_masking_functions'; +DROP DATABASE dict_db; +DROP USER udftest_priv@localhost; + +--remove_file $error_log_file +--source include/wait_until_count_sessions.inc From 58517cfdd82c31c741bcfe234c4c770b7da54268 Mon Sep 17 00:00:00 2001 From: Oleksandr Kachan Date: Mon, 8 Apr 2024 12:15:48 +0300 Subject: [PATCH 3/8] PS-9148: Implement dictionary flusher for masking_functions plugin https://perconadev.atlassian.net/browse/PS-9148 - Added component_masking.dictionaries_flush_interval_seconds system variable. - Added actual flusher thread. It periodically rereads content of dictionary table and updates in-memory cache. --- .../include/masking_functions/query_cache.hpp | 26 +++++ .../include/masking_functions/sys_vars.hpp | 3 + .../masking_functions/src/component.cpp | 5 + .../src/masking_functions/query_cache.cpp | 105 +++++++++++++++++- .../src/masking_functions/sys_vars.cpp | 34 ++++++ .../r/rpl_dictionaries_flush_interval.result | 70 ++++++++++++ ...naries_flush_interval_seconds_basic.result | 31 ++++++ ...rpl_dictionaries_flush_interval-master.opt | 2 + .../rpl_dictionaries_flush_interval-slave.opt | 2 + .../t/rpl_dictionaries_flush_interval.test | 67 +++++++++++ ...ionaries_flush_interval_seconds_basic.test | 47 ++++++++ 11 files changed, 391 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/component_masking_functions/r/rpl_dictionaries_flush_interval.result create mode 100644 mysql-test/suite/component_masking_functions/r/sys_var_dictionaries_flush_interval_seconds_basic.result create mode 100644 mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-master.opt create mode 100644 mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-slave.opt create mode 100644 mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval.test create mode 100644 mysql-test/suite/component_masking_functions/t/sys_var_dictionaries_flush_interval_seconds_basic.test diff --git a/components/masking_functions/include/masking_functions/query_cache.hpp b/components/masking_functions/include/masking_functions/query_cache.hpp index 592d812f6de1..809a6db8578b 100644 --- a/components/masking_functions/include/masking_functions/query_cache.hpp +++ b/components/masking_functions/include/masking_functions/query_cache.hpp @@ -16,9 +16,16 @@ #ifndef MASKING_FUNCTIONS_QUERY_CACHE_HPP #define MASKING_FUNCTIONS_QUERY_CACHE_HPP +#include +#include +#include +#include #include #include +#include +#include + #include "masking_functions/dictionary_container.hpp" namespace masking_functions { @@ -26,6 +33,11 @@ namespace masking_functions { class query_cache { public: query_cache(); + query_cache(query_cache &other) = delete; + query_cache(query_cache &&other) = delete; + query_cache &operator=(query_cache &other) = delete; + query_cache &operator=(query_cache &&other) = delete; + ~query_cache(); bool contains(const std::string &dictionary_name, const std::string &term) const; @@ -35,9 +47,23 @@ class query_cache { bool insert(const std::string &dictionary_name, const std::string &term); bool load_cache(); + void init_thd() noexcept; + void release_thd() noexcept; + void dict_flusher() noexcept; + private: mutable std::shared_mutex m_dict_mut; dictionary_container m_dict_cache; + + ulonglong m_flusher_interval_seconds; + std::atomic m_is_flusher_stopped; + std::mutex m_flusher_mutex; + std::condition_variable m_flusher_condition_var; + + PSI_thread_key m_psi_flusher_thread_key; + my_thread_handle m_flusher_thread; + my_thread_attr_t m_flusher_thread_attr; + std::unique_ptr m_flusher_thd; }; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/sys_vars.hpp b/components/masking_functions/include/masking_functions/sys_vars.hpp index 45a80992ef2f..599773df3816 100644 --- a/components/masking_functions/include/masking_functions/sys_vars.hpp +++ b/components/masking_functions/include/masking_functions/sys_vars.hpp @@ -16,11 +16,14 @@ #ifndef MASKING_FUNCTIONS_SYS_VARS_HPP #define MASKING_FUNCTIONS_SYS_VARS_HPP +#include "my_inttypes.h" + #include namespace masking_functions::sys_vars { std::string_view get_dict_database_name() noexcept; +ulonglong get_flush_interval_seconds() noexcept; bool register_sys_vars(); bool unregister_sys_vars(); diff --git a/components/masking_functions/src/component.cpp b/components/masking_functions/src/component.cpp index d37e24084934..d83b85de5202 100644 --- a/components/masking_functions/src/component.cpp +++ b/components/masking_functions/src/component.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,8 @@ REQUIRES_SERVICE_PLACEHOLDER(mysql_command_query_result); REQUIRES_SERVICE_PLACEHOLDER(mysql_command_options); REQUIRES_SERVICE_PLACEHOLDER(mysql_command_factory); +REQUIRES_PSI_THREAD_SERVICE_PLACEHOLDER; + REQUIRES_SERVICE_PLACEHOLDER(udf_registration); REQUIRES_SERVICE_PLACEHOLDER(dynamic_privilege_register); @@ -192,6 +195,8 @@ BEGIN_COMPONENT_REQUIRES(CURRENT_COMPONENT_NAME) REQUIRES_SERVICE(mysql_string_substr), REQUIRES_SERVICE(mysql_string_compare), + REQUIRES_PSI_THREAD_SERVICE, + REQUIRES_SERVICE(mysql_command_query), REQUIRES_SERVICE(mysql_command_query_result), REQUIRES_SERVICE(mysql_command_options), diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp index 7107c728b265..a19219e9201e 100644 --- a/components/masking_functions/src/masking_functions/query_cache.cpp +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -19,18 +19,121 @@ #include "masking_functions/primitive_singleton.hpp" #include "masking_functions/query_builder.hpp" #include "masking_functions/sql_context.hpp" +#include "masking_functions/sys_vars.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +extern REQUIRES_SERVICE_PLACEHOLDER(log_builtins); namespace masking_functions { namespace { +constexpr std::string_view psi_category_name{"masking_functions"}; +constexpr std::string_view flusher_thd_psi_name{ + "masking_functions_dict_flusher"}; +constexpr std::string_view flusher_thd_psi_os_name{"mf_flusher"}; + using global_command_services = masking_functions::primitive_singleton< masking_functions::command_service_tuple>; using global_query_builder = masking_functions::primitive_singleton; +void *run_dict_flusher(void *arg) { + auto *self = reinterpret_cast(arg); + self->init_thd(); + self->dict_flusher(); + self->release_thd(); + return nullptr; +} + } // namespace -query_cache::query_cache() { load_cache(); } +query_cache::query_cache() + : m_flusher_interval_seconds{sys_vars::get_flush_interval_seconds()}, + m_is_flusher_stopped{true} { + load_cache(); + + if (m_flusher_interval_seconds > 0) { + PSI_thread_info thread_info{&m_psi_flusher_thread_key, + flusher_thd_psi_name.data(), + flusher_thd_psi_os_name.data(), + PSI_FLAG_SINGLETON, + 0, + PSI_DOCUMENT_ME}; + mysql_thread_register(psi_category_name.data(), &thread_info, 1); + + const auto res = + mysql_thread_create(m_psi_flusher_thread_key, &m_flusher_thread, + &m_flusher_thread_attr, run_dict_flusher, this); + + if (res != 0) { + LogComponentErr(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, + "Cannot initialize dictionary flusher"); + } else { + m_is_flusher_stopped = false; + } + } +} + +query_cache::~query_cache() { + if (!m_is_flusher_stopped) { + m_is_flusher_stopped = true; + m_flusher_condition_var.notify_one(); + } +} + +void query_cache::init_thd() noexcept { + auto *thd = new THD; + my_thread_init(); + thd->set_new_thread_id(); + thd->thread_stack = reinterpret_cast(&thd); + thd->store_globals(); + m_flusher_thd.reset(thd); +} + +void query_cache::release_thd() noexcept { my_thread_end(); } + +void query_cache::dict_flusher() noexcept { +#ifdef HAVE_PSI_THREAD_INTERFACE + { + struct PSI_thread *psi = m_flusher_thd->get_psi(); + PSI_THREAD_CALL(set_thread_id)(psi, m_flusher_thd->thread_id()); + PSI_THREAD_CALL(set_thread_THD)(psi, m_flusher_thd.get()); + PSI_THREAD_CALL(set_thread_command)(m_flusher_thd->get_command()); + PSI_THREAD_CALL(set_thread_info) + (STRING_WITH_LEN("Masking functions component cache flusher")); + } +#endif + + while (!m_is_flusher_stopped) { + std::unique_lock lock{m_flusher_mutex}; + const auto wait_started_at = std::chrono::system_clock::now(); + m_flusher_condition_var.wait_for( + lock, std::chrono::seconds{m_flusher_interval_seconds}, + [this, wait_started_at] { + return std::chrono::duration_cast( + std::chrono::system_clock::now() - wait_started_at) >= + std::chrono::seconds{m_flusher_interval_seconds} || + m_is_flusher_stopped.load(); + }); + + if (!m_is_flusher_stopped) { + load_cache(); + + DBUG_EXECUTE_IF("masking_functions_signal_on_cache_reload", { + const char act[] = "now SIGNAL masking_functions_cache_reload_done"; + assert(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act))); + };); + } + } +} bool query_cache::load_cache() { auto query = global_query_builder::instance().select_all_from_dictionary(); diff --git a/components/masking_functions/src/masking_functions/sys_vars.cpp b/components/masking_functions/src/masking_functions/sys_vars.cpp index e8a44a7dbb58..3b4a02d3f313 100644 --- a/components/masking_functions/src/masking_functions/sys_vars.cpp +++ b/components/masking_functions/src/masking_functions/sys_vars.cpp @@ -21,6 +21,7 @@ #include +#include #include #include @@ -32,20 +33,30 @@ namespace masking_functions::sys_vars { namespace { using str_arg_check_type = STR_CHECK_ARG(str); +using ulonglong_arg_check_type = INTEGRAL_CHECK_ARG(ulonglong); constexpr std::string_view component_name{"masking_functions"}; constexpr std::string_view masking_database_var_name{"masking_database"}; +constexpr std::string_view flush_interval_var_name{ + "dictionaries_flush_interval_seconds"}; std::string default_database_name{"mysql"}; +const ulonglong default_flush_interval_seconds = 0; bool is_database_name_initialised = false; +bool is_flush_interval_initialised = false; char *database_name; +ulonglong flush_interval_seconds = 0; } // namespace std::string_view get_dict_database_name() noexcept { return database_name; } +ulonglong get_flush_interval_seconds() noexcept { + return flush_interval_seconds; +} + bool register_sys_vars() { str_arg_check_type check_db_name{default_database_name.data()}; @@ -61,6 +72,23 @@ bool register_sys_vars() { } is_database_name_initialised = true; + ulonglong_arg_check_type check_flush_interval{default_flush_interval_seconds, + 0, ULLONG_MAX, 1}; + + if (mysql_service_component_sys_variable_register->register_variable( + component_name.data(), flush_interval_var_name.data(), + PLUGIN_VAR_LONGLONG | PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_RQCMDARG | + PLUGIN_VAR_READONLY, + "Sets the interval, in seconds, to wait before attempting to " + "schedule another flush of the data masking dictionaries table to " + "the memory data masking dictionaries cache following a restart or " + "previous execution.", + nullptr, nullptr, static_cast(&check_flush_interval), + static_cast(&flush_interval_seconds)) != 0) { + return false; + } + is_flush_interval_initialised = true; + return true; } @@ -73,6 +101,12 @@ bool unregister_sys_vars() { is_success = false; } + if (is_flush_interval_initialised && + mysql_service_component_sys_variable_unregister->unregister_variable( + component_name.data(), flush_interval_var_name.data()) != 0) { + is_success = false; + } + return is_success; } diff --git a/mysql-test/suite/component_masking_functions/r/rpl_dictionaries_flush_interval.result b/mysql-test/suite/component_masking_functions/r/rpl_dictionaries_flush_interval.result new file mode 100644 index 000000000000..1961728b739f --- /dev/null +++ b/mysql-test/suite/component_masking_functions/r/rpl_dictionaries_flush_interval.result @@ -0,0 +1,70 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information. +[connection master] +[connection master] +SET GLOBAL DEBUG='+d, masking_functions_signal_on_cache_reload'; +INSTALL COMPONENT 'file://component_masking_functions'; +[connection slave] +SET GLOBAL DEBUG='+d, masking_functions_signal_on_cache_reload'; +INSTALL COMPONENT 'file://component_masking_functions'; +[connection master] +CREATE TABLE mysql.masking_dictionaries( +Dictionary VARCHAR(256) NOT NULL, +Term VARCHAR(256) NOT NULL, +UNIQUE INDEX dictionary_term_idx (Dictionary, Term) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +SELECT masking_dictionary_term_add('single_dict_1', 'entry_1'); +masking_dictionary_term_add('single_dict_1', 'entry_1') +1 +SELECT masking_dictionary_term_add('single_dict_2', 'entry_2'); +masking_dictionary_term_add('single_dict_2', 'entry_2') +1 +SELECT gen_dictionary('single_dict_1'); +gen_dictionary('single_dict_1') +entry_1 +SELECT gen_dictionary('single_dict_2'); +gen_dictionary('single_dict_2') +entry_2 +include/rpl_sync.inc +[connection slave] +SELECT * FROM mysql.masking_dictionaries; +Dictionary Term +single_dict_1 entry_1 +single_dict_2 entry_2 +SELECT gen_dictionary('single_dict_1'); +gen_dictionary('single_dict_1') +entry_1 +SELECT gen_dictionary('single_dict_2'); +gen_dictionary('single_dict_2') +entry_2 +[connection master] +INSERT INTO mysql.masking_dictionaries VALUES ('single_dict_3', 'entry_3'); +SET DEBUG_SYNC='now WAIT_FOR masking_functions_cache_reload_done'; +SELECT gen_dictionary('single_dict_3'); +gen_dictionary('single_dict_3') +entry_3 +include/rpl_sync.inc +[connection slave] +SELECT * FROM mysql.masking_dictionaries; +Dictionary Term +single_dict_1 entry_1 +single_dict_2 entry_2 +single_dict_3 entry_3 +SET DEBUG_SYNC='now WAIT_FOR masking_functions_cache_reload_done'; +SELECT gen_dictionary('single_dict_3'); +gen_dictionary('single_dict_3') +entry_3 +[connection slave] +SET GLOBAL DEBUG='-d, masking_functions_signal_on_cache_reload'; +UNINSTALL COMPONENT 'file://component_masking_functions'; +[connection master] +SET GLOBAL DEBUG='-d, masking_functions_signal_on_cache_reload'; +UNINSTALL COMPONENT 'file://component_masking_functions'; +DROP USER udftest_priv@localhost; +DROP TABLE mysql.masking_dictionaries; +include/rpl_sync.inc +include/rpl_end.inc diff --git a/mysql-test/suite/component_masking_functions/r/sys_var_dictionaries_flush_interval_seconds_basic.result b/mysql-test/suite/component_masking_functions/r/sys_var_dictionaries_flush_interval_seconds_basic.result new file mode 100644 index 000000000000..415f35602080 --- /dev/null +++ b/mysql-test/suite/component_masking_functions/r/sys_var_dictionaries_flush_interval_seconds_basic.result @@ -0,0 +1,31 @@ +INSTALL COMPONENT 'file://component_masking_functions'; +SELECT @@global.masking_functions.dictionaries_flush_interval_seconds; +@@global.masking_functions.dictionaries_flush_interval_seconds +0 +SELECT NAME FROM performance_schema.threads WHERE NAME LIKE "%masking_functions%"; +NAME +SET GLOBAL masking_functions.dictionaries_flush_interval_seconds=100; +ERROR HY000: Variable 'masking_functions.dictionaries_flush_interval_seconds' is a read only variable +SET SESSION masking_functions.dictionaries_flush_interval_seconds=100; +ERROR HY000: Variable 'masking_functions.dictionaries_flush_interval_seconds' is a read only variable +# restart: --masking-functions.dictionaries-flush-interval-seconds=100 +SELECT @@global.masking_functions.dictionaries_flush_interval_seconds; +@@global.masking_functions.dictionaries_flush_interval_seconds +100 +CREATE TABLE mysql.masking_dictionaries( +Dictionary VARCHAR(256) NOT NULL, +Term VARCHAR(256) NOT NULL, +UNIQUE INDEX dictionary_term_idx (Dictionary, Term) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +SELECT masking_dictionary_term_add('single_dict_1', 'entry_1'); +masking_dictionary_term_add('single_dict_1', 'entry_1') +1 +SELECT NAME FROM performance_schema.threads WHERE NAME LIKE "%masking_functions%"; +NAME +thread/masking_functions/masking_functions_dict_flusher +UNINSTALL COMPONENT 'file://component_masking_functions'; +DROP USER udftest_priv@localhost; +DROP TABLE mysql.masking_dictionaries; +# restart: diff --git a/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-master.opt b/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-master.opt new file mode 100644 index 000000000000..414e47ef180a --- /dev/null +++ b/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-master.opt @@ -0,0 +1,2 @@ +$MASKING_FUNCTIONS_COMPONENT_OPT +--loose-masking_functions.dictionaries_flush_interval_seconds=1 diff --git a/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-slave.opt b/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-slave.opt new file mode 100644 index 000000000000..414e47ef180a --- /dev/null +++ b/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval-slave.opt @@ -0,0 +1,2 @@ +$MASKING_FUNCTIONS_COMPONENT_OPT +--loose-masking_functions.dictionaries_flush_interval_seconds=1 diff --git a/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval.test b/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval.test new file mode 100644 index 000000000000..4449423814d4 --- /dev/null +++ b/mysql-test/suite/component_masking_functions/t/rpl_dictionaries_flush_interval.test @@ -0,0 +1,67 @@ +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_masking_functions_component.inc +--source include/master-slave.inc + +--source include/rpl_connection_master.inc +SET GLOBAL DEBUG='+d, masking_functions_signal_on_cache_reload'; +INSTALL COMPONENT 'file://component_masking_functions'; +--source include/rpl_connection_slave.inc +SET GLOBAL DEBUG='+d, masking_functions_signal_on_cache_reload'; +INSTALL COMPONENT 'file://component_masking_functions'; + +--source include/rpl_connection_master.inc +CREATE TABLE mysql.masking_dictionaries( + Dictionary VARCHAR(256) NOT NULL, + Term VARCHAR(256) NOT NULL, + UNIQUE INDEX dictionary_term_idx (Dictionary, Term) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +--connect(con_priv,localhost,udftest_priv,,) + +SELECT masking_dictionary_term_add('single_dict_1', 'entry_1'); +SELECT masking_dictionary_term_add('single_dict_2', 'entry_2'); +SELECT gen_dictionary('single_dict_1'); +SELECT gen_dictionary('single_dict_2'); + +--source include/rpl_sync.inc +--source include/rpl_connection_slave.inc + +SELECT * FROM mysql.masking_dictionaries; +SELECT gen_dictionary('single_dict_1'); +SELECT gen_dictionary('single_dict_2'); + +--source include/rpl_connection_master.inc +INSERT INTO mysql.masking_dictionaries VALUES ('single_dict_3', 'entry_3'); +SET DEBUG_SYNC='now WAIT_FOR masking_functions_cache_reload_done'; + +# Will fail to get data from single_dict_3 at this point if no dictionary flusher thread is running +SELECT gen_dictionary('single_dict_3'); + +--source include/rpl_sync.inc +--source include/rpl_connection_slave.inc + +SELECT * FROM mysql.masking_dictionaries; +SET DEBUG_SYNC='now WAIT_FOR masking_functions_cache_reload_done'; + +# Will fail to get data from single_dict_3 at this point if no dictionary flusher thread is running +SELECT gen_dictionary('single_dict_3'); + +# +# Cleanup +--disconnect con_priv + +--source include/rpl_connection_slave.inc +SET GLOBAL DEBUG='-d, masking_functions_signal_on_cache_reload'; +UNINSTALL COMPONENT 'file://component_masking_functions'; +--source include/rpl_connection_master.inc +SET GLOBAL DEBUG='-d, masking_functions_signal_on_cache_reload'; +UNINSTALL COMPONENT 'file://component_masking_functions'; + +DROP USER udftest_priv@localhost; +DROP TABLE mysql.masking_dictionaries; + +--source include/rpl_sync.inc +--source include/rpl_end.inc diff --git a/mysql-test/suite/component_masking_functions/t/sys_var_dictionaries_flush_interval_seconds_basic.test b/mysql-test/suite/component_masking_functions/t/sys_var_dictionaries_flush_interval_seconds_basic.test new file mode 100644 index 000000000000..cfb8a75a0359 --- /dev/null +++ b/mysql-test/suite/component_masking_functions/t/sys_var_dictionaries_flush_interval_seconds_basic.test @@ -0,0 +1,47 @@ +--source include/have_masking_functions_component.inc + +INSTALL COMPONENT 'file://component_masking_functions'; + +# No running flusher thread with default settings +SELECT @@global.masking_functions.dictionaries_flush_interval_seconds; +SELECT NAME FROM performance_schema.threads WHERE NAME LIKE "%masking_functions%"; + +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET GLOBAL masking_functions.dictionaries_flush_interval_seconds=100; + +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET SESSION masking_functions.dictionaries_flush_interval_seconds=100; + +--let $restart_parameters="restart: --masking-functions.dictionaries-flush-interval-seconds=100" +--source include/restart_mysqld.inc + +SELECT @@global.masking_functions.dictionaries_flush_interval_seconds; + +# Make sure dict flusher process is running +CREATE TABLE mysql.masking_dictionaries( + Dictionary VARCHAR(256) NOT NULL, + Term VARCHAR(256) NOT NULL, + UNIQUE INDEX dictionary_term_idx (Dictionary, Term) +) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE USER udftest_priv@localhost; +GRANT MASKING_DICTIONARIES_ADMIN ON *.* TO udftest_priv@localhost; +--connect(con_priv,localhost,udftest_priv,,) + +SELECT masking_dictionary_term_add('single_dict_1', 'entry_1'); + +# Flusher thread is active +--connection default +SELECT NAME FROM performance_schema.threads WHERE NAME LIKE "%masking_functions%"; + +# +# Cleanup +--disconnect con_priv + +UNINSTALL COMPONENT 'file://component_masking_functions'; + +DROP USER udftest_priv@localhost; +DROP TABLE mysql.masking_dictionaries; + +--let $restart_parameters="restart:" +--source include/restart_mysqld.inc From d505232c771e8776328f7c76940f8b20a4b4df29 Mon Sep 17 00:00:00 2001 From: Yura Sorokin Date: Wed, 24 Apr 2024 15:39:53 +0200 Subject: [PATCH 4/8] PS-9148: Implemented hierarchical storage for dictionaries and terms https://perconadev.atlassian.net/browse/PS-9148 Introduced 'dictionary' and 'bookshelf' classes for storing terms on per-dictionary level. Reworked 'query_cache' to utilize these two new classes. --- components/masking_functions/CMakeLists.txt | 8 +- .../include/masking_functions/bookshelf.hpp | 60 ++++++++++++ .../masking_functions/bookshelf_fwd.hpp | 29 ++++++ .../include/masking_functions/dictionary.hpp | 54 +++++++++++ .../dictionary_container.hpp | 56 ----------- .../masking_functions/dictionary_fwd.hpp | 33 +++++++ .../include/masking_functions/query_cache.hpp | 11 +-- .../include/masking_functions/sql_context.hpp | 4 +- .../src/masking_functions/bookshelf.cpp | 96 +++++++++++++++++++ .../src/masking_functions/dictionary.cpp | 56 +++++++++++ .../dictionary_container.cpp | 91 ------------------ .../src/masking_functions/query_cache.cpp | 34 +++---- .../registration_routines.cpp | 5 +- .../src/masking_functions/sql_context.cpp | 8 +- 14 files changed, 361 insertions(+), 184 deletions(-) create mode 100644 components/masking_functions/include/masking_functions/bookshelf.hpp create mode 100644 components/masking_functions/include/masking_functions/bookshelf_fwd.hpp create mode 100644 components/masking_functions/include/masking_functions/dictionary.hpp delete mode 100644 components/masking_functions/include/masking_functions/dictionary_container.hpp create mode 100644 components/masking_functions/include/masking_functions/dictionary_fwd.hpp create mode 100644 components/masking_functions/src/masking_functions/bookshelf.cpp create mode 100644 components/masking_functions/src/masking_functions/dictionary.cpp delete mode 100644 components/masking_functions/src/masking_functions/dictionary_container.cpp diff --git a/components/masking_functions/CMakeLists.txt b/components/masking_functions/CMakeLists.txt index 9f8ad21658b0..d0f20bcfbd54 100644 --- a/components/masking_functions/CMakeLists.txt +++ b/components/masking_functions/CMakeLists.txt @@ -26,9 +26,10 @@ endif() set(DATAMASKING_SOURCES src/component.cpp + src/masking_functions/bookshelf.cpp src/masking_functions/charset_string.cpp src/masking_functions/charset_string_operations.cpp - src/masking_functions/dictionary_container.cpp + src/masking_functions/dictionary.cpp src/masking_functions/query_builder.cpp src/masking_functions/query_cache.cpp src/masking_functions/random_string_generators.cpp @@ -37,12 +38,15 @@ set(DATAMASKING_SOURCES src/masking_functions/sql_escape_functions.cpp src/masking_functions/sys_vars.cpp + include/masking_functions/bookshelf_fwd.hpp + include/masking_functions/bookshelf.hpp include/masking_functions/charset_string_fwd.hpp include/masking_functions/charset_string.hpp include/masking_functions/charset_string_operations.hpp include/masking_functions/command_service_tuple_fwd.hpp include/masking_functions/command_service_tuple.hpp - include/masking_functions/dictionary_container.hpp + include/masking_functions/dictionary_fwd.hpp + include/masking_functions/dictionary.hpp include/masking_functions/primitive_singleton.hpp include/masking_functions/query_builder.hpp include/masking_functions/query_cache.hpp diff --git a/components/masking_functions/include/masking_functions/bookshelf.hpp b/components/masking_functions/include/masking_functions/bookshelf.hpp new file mode 100644 index 000000000000..25fb70abb939 --- /dev/null +++ b/components/masking_functions/include/masking_functions/bookshelf.hpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_BOOKSHELF_HPP +#define MASKING_FUNCTIONS_BOOKSHELF_HPP + +#include "masking_functions/bookshelf_fwd.hpp" + +#include +#include +#include + +#include "masking_functions/dictionary_fwd.hpp" + +namespace masking_functions { + +class bookshelf { + public: + bookshelf() = default; + bookshelf(const dictionary &) = delete; + bookshelf(bookshelf &&) = delete; + bookshelf &operator=(const bookshelf &) = delete; + bookshelf &operator=(bookshelf &&) = delete; + + bool contains(const std::string &dictionary_name, + const std::string &term) const noexcept; + // returning a copy deliberately for thread safety + optional_string get_random(const std::string &dictionary_name) const noexcept; + bool remove(const std::string &dictionary_name) noexcept; + bool remove(const std::string &dictionary_name, + const std::string &term) noexcept; + bool insert(const std::string &dictionary_name, const std::string &term); + + private: + // TODO: in c++20 change to method signatures to accept std::string_view + // and container to std::unordered_map>. + using dictionary_container = std::unordered_map; + dictionary_container dictionaries_; + mutable std::shared_mutex dictionaries_mutex_; + + dictionary_ptr find_dictionary_internal( + const std::string &dictionary_name) const noexcept; +}; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_BOOKSHELF_HPP diff --git a/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp b/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp new file mode 100644 index 000000000000..6b7b869ce553 --- /dev/null +++ b/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_BOOKSHELF_FWD_HPP +#define MASKING_FUNCTIONS_BOOKSHELF_FWD_HPP + +#include + +namespace masking_functions { + +class bookshelf; + +using bookshelf_ptr = std::shared_ptr; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_BOOKSHELF_HPP diff --git a/components/masking_functions/include/masking_functions/dictionary.hpp b/components/masking_functions/include/masking_functions/dictionary.hpp new file mode 100644 index 000000000000..1cbcac5027a8 --- /dev/null +++ b/components/masking_functions/include/masking_functions/dictionary.hpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_DICTIONARY_HPP +#define MASKING_FUNCTIONS_DICTIONARY_HPP + +#include "masking_functions/dictionary_fwd.hpp" + +#include +#include +#include + +namespace masking_functions { + +class dictionary { + public: + // a convenience constructor that creates a dictionary with one term + explicit dictionary(const std::string &term); + + dictionary(const dictionary &) = delete; + dictionary(dictionary &&) = delete; + dictionary &operator=(const dictionary &) = delete; + dictionary &operator=(dictionary &&) = delete; + + bool contains(const std::string &term) const noexcept; + // returning a copy deliberately for thread safety + optional_string get_random() const; + bool insert(const std::string &term); + bool remove(const std::string &term) noexcept; + + private: + // TODO: in c++20 change to method signatures to accept std::string_view + // and container to std::unordered_set>. + using term_container = std::unordered_set; + term_container terms_; + mutable std::shared_mutex terms_mutex_; +}; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_DICTIONARY_HPP diff --git a/components/masking_functions/include/masking_functions/dictionary_container.hpp b/components/masking_functions/include/masking_functions/dictionary_container.hpp deleted file mode 100644 index 67db3673711c..000000000000 --- a/components/masking_functions/include/masking_functions/dictionary_container.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ - -#ifndef MASKING_FUNCTIONS_DICT_CONTAINER_HPP -#define MASKING_FUNCTIONS_DICT_CONTAINER_HPP - -#include -#include -#include -#include -#include -#include -#include - -namespace masking_functions { - -using optional_string = std::optional; - -class dictionary_container { - struct term_container { - explicit term_container(std::string term) : term_list{std::move(term)} {} - mutable std::shared_mutex term_mutex; - std::set term_list; - }; - - public: - bool contains(const std::string &dictionary_name, - const std::string &term) const noexcept; - optional_string get(const std::string &dictionary_name) const noexcept; - bool remove(const std::string &dictionary_name) noexcept; - bool remove(const std::string &dictionary_name, - const std::string &term) noexcept; - bool insert(const std::string &dictionary_name, - const std::string &term) noexcept; - - private: - std::map m_container; -}; - -using optional_dictionary_container = std::optional; - -} // namespace masking_functions - -#endif // MASKING_FUNCTIONS_DICT_CONTAINER_HPP diff --git a/components/masking_functions/include/masking_functions/dictionary_fwd.hpp b/components/masking_functions/include/masking_functions/dictionary_fwd.hpp new file mode 100644 index 000000000000..346781b55109 --- /dev/null +++ b/components/masking_functions/include/masking_functions/dictionary_fwd.hpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_DICTIONARY_FWD_HPP +#define MASKING_FUNCTIONS_DICTIONARY_FWD_HPP + +#include +#include +#include + +namespace masking_functions { + +using optional_string = std::optional; + +class dictionary; + +using dictionary_ptr = std::shared_ptr; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_DICTIONARY_FWD_HPP diff --git a/components/masking_functions/include/masking_functions/query_cache.hpp b/components/masking_functions/include/masking_functions/query_cache.hpp index 809a6db8578b..2bbff0fb48e4 100644 --- a/components/masking_functions/include/masking_functions/query_cache.hpp +++ b/components/masking_functions/include/masking_functions/query_cache.hpp @@ -26,22 +26,22 @@ #include #include -#include "masking_functions/dictionary_container.hpp" +#include "masking_functions/bookshelf.hpp" namespace masking_functions { class query_cache { public: query_cache(); - query_cache(query_cache &other) = delete; + query_cache(const query_cache &other) = delete; query_cache(query_cache &&other) = delete; - query_cache &operator=(query_cache &other) = delete; + query_cache &operator=(const query_cache &other) = delete; query_cache &operator=(query_cache &&other) = delete; ~query_cache(); bool contains(const std::string &dictionary_name, const std::string &term) const; - optional_string get(const std::string &dictionary_name) const; + optional_string get_random(const std::string &dictionary_name) const; bool remove(const std::string &dictionary_name); bool remove(const std::string &dictionary_name, const std::string &term); bool insert(const std::string &dictionary_name, const std::string &term); @@ -52,8 +52,7 @@ class query_cache { void dict_flusher() noexcept; private: - mutable std::shared_mutex m_dict_mut; - dictionary_container m_dict_cache; + bookshelf_ptr m_dict_cache; ulonglong m_flusher_interval_seconds; std::atomic m_is_flusher_stopped; diff --git a/components/masking_functions/include/masking_functions/sql_context.hpp b/components/masking_functions/include/masking_functions/sql_context.hpp index d88b6de1bc16..37d2d3a67f9e 100644 --- a/components/masking_functions/include/masking_functions/sql_context.hpp +++ b/components/masking_functions/include/masking_functions/sql_context.hpp @@ -21,8 +21,8 @@ #include #include +#include "masking_functions/bookshelf_fwd.hpp" #include "masking_functions/command_service_tuple_fwd.hpp" -#include "masking_functions/dictionary_container.hpp" namespace masking_functions { @@ -46,7 +46,7 @@ class sql_context { return *impl_.get_deleter().services; } - optional_dictionary_container query_list(std::string_view query); + bookshelf_ptr query_list(std::string_view query); bool execute(std::string_view query); diff --git a/components/masking_functions/src/masking_functions/bookshelf.cpp b/components/masking_functions/src/masking_functions/bookshelf.cpp new file mode 100644 index 000000000000..6685bd34aea0 --- /dev/null +++ b/components/masking_functions/src/masking_functions/bookshelf.cpp @@ -0,0 +1,96 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#include "masking_functions/bookshelf.hpp" + +#include +#include +#include + +#include "masking_functions/dictionary.hpp" + +namespace masking_functions { + +bool bookshelf::contains(const std::string &dictionary_name, + const std::string &term) const noexcept { + const auto dict{find_dictionary_internal(dictionary_name)}; + if (!dict) { + return false; + } + return dict->contains(term); +} + +// returning a copy deliberately for thread safety +optional_string bookshelf::get_random( + const std::string &dictionary_name) const noexcept { + const auto dict{find_dictionary_internal(dictionary_name)}; + if (!dict) { + return std::nullopt; + } + return dict->get_random(); +} + +bool bookshelf::remove(const std::string &dictionary_name) noexcept { + std::unique_lock dictionaries_write_lock{dictionaries_mutex_}; + return dictionaries_.erase(dictionary_name) != 0U; +} + +bool bookshelf::remove(const std::string &dictionary_name, + const std::string &term) noexcept { + const auto dict{find_dictionary_internal(dictionary_name)}; + if (!dict) { + return false; + } + return dict->remove(term); + // after this operation we may have a dictionary with no terms in it - + // it is fine and much safer than trying to re-acquire a write lock and + // removing the dictionary from the bookshelf when it has 0 terms. +} + +bool bookshelf::insert(const std::string &dictionary_name, + const std::string &term) { + auto dict{find_dictionary_internal(dictionary_name)}; + if (dict) { + return dict->insert(term); + } + + // if no dictionary with such name alteady exist, we need to + // create it under a write lock + { + std::unique_lock dictionaries_write_lock{dictionaries_mutex_}; + // it may happen that between the read and write locks another thread + // already created the dictionary with the same name - checking again + dict = std::make_shared(term); + const auto [dictionary_it, + inserted]{dictionaries_.emplace(dictionary_name, dict)}; + if (inserted) { + return true; + } + dict = dictionary_it->second; + } + return dict->insert(term); +} + +dictionary_ptr bookshelf::find_dictionary_internal( + const std::string &dictionary_name) const noexcept { + std::shared_lock dictionaries_read_lock{dictionaries_mutex_}; + const auto dictionary_it{dictionaries_.find(dictionary_name)}; + if (dictionary_it == std::cend(dictionaries_)) { + return {}; + } + return dictionary_it->second; +} + +} // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/dictionary.cpp b/components/masking_functions/src/masking_functions/dictionary.cpp new file mode 100644 index 000000000000..9d7ddc90bc2c --- /dev/null +++ b/components/masking_functions/src/masking_functions/dictionary.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#include "masking_functions/dictionary.hpp" + +#include "masking_functions/random_string_generators.hpp" + +#include +#include +#include + +namespace masking_functions { + +dictionary::dictionary(const std::string &term) : terms_{}, terms_mutex_{} { + terms_.emplace(term); +} + +bool dictionary::contains(const std::string &term) const noexcept { + std::shared_lock terms_read_lock{terms_mutex_}; + return terms_.count(term) > 0U; +} + +optional_string dictionary::get_random() const { + std::shared_lock terms_read_lock{terms_mutex_}; + + if (terms_.empty()) { + return std::nullopt; + } + + const auto random_index{random_number(0, terms_.size() - 1U)}; + return *std::next(terms_.begin(), random_index); +} + +bool dictionary::insert(const std::string &term) { + std::unique_lock terms_write_lock{terms_mutex_}; + return terms_.emplace(term).second; +} + +bool dictionary::remove(const std::string &term) noexcept { + std::unique_lock terms_write_lock{terms_mutex_}; + return terms_.erase(term) > 0U; +} + +} // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/dictionary_container.cpp b/components/masking_functions/src/masking_functions/dictionary_container.cpp deleted file mode 100644 index dd0f4e33fc22..000000000000 --- a/components/masking_functions/src/masking_functions/dictionary_container.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ - -#include "masking_functions/dictionary_container.hpp" - -#include "masking_functions/random_string_generators.hpp" - -#include - -namespace masking_functions { - -bool dictionary_container::contains(const std::string &dictionary_name, - const std::string &term) const noexcept { - const auto it = m_container.find(dictionary_name); - - if (it == m_container.cend()) { - return false; - } - - if (term.length() == 0) { - return true; - } - - std::shared_lock term_read_lock{it->second.term_mutex}; - return it->second.term_list.count(term) > 0; -} - -optional_string dictionary_container::get( - const std::string &dictionary_name) const noexcept { - const auto dict_it = m_container.find(dictionary_name); - - if (dict_it == m_container.cend()) { - return std::nullopt; - } - - std::shared_lock term_read_lock{dict_it->second.term_mutex}; - - if (dict_it->second.term_list.empty()) { - return std::nullopt; - } - - auto random_step = random_number(0, dict_it->second.term_list.size() - 1); - auto term_it = dict_it->second.term_list.begin(); - std::advance(term_it, random_step); - - return optional_string{std::in_place, *term_it}; -} - -bool dictionary_container::remove(const std::string &dictionary_name) noexcept { - return m_container.erase(dictionary_name) > 0; -} - -bool dictionary_container::remove(const std::string &dictionary_name, - const std::string &term) noexcept { - const auto dict_it = m_container.find(dictionary_name); - - if (dict_it == m_container.cend()) { - return false; - } - - std::unique_lock term_write_lock{dict_it->second.term_mutex}; - return dict_it->second.term_list.erase(term) > 0; -} - -bool dictionary_container::insert(const std::string &dictionary_name, - const std::string &term) noexcept { - auto it = m_container.find(dictionary_name); - - if (it != m_container.end()) { - std::unique_lock term_write_lock{it->second.term_mutex}; - it->second.term_list.emplace(term); - } else { - m_container.emplace(dictionary_name, term); - } - - return true; -} - -} // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp index a19219e9201e..386f372b4811 100644 --- a/components/masking_functions/src/masking_functions/query_cache.cpp +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -136,33 +136,29 @@ void query_cache::dict_flusher() noexcept { } bool query_cache::load_cache() { + masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = global_query_builder::instance().select_all_from_dictionary(); - auto result = - masking_functions::sql_context{global_command_services::instance()} - .query_list(query); + auto result = sql_ctx.query_list(query); - if (result.has_value()) { - std::unique_lock dict_write_lock{m_dict_mut}; - m_dict_cache = std::move(result.value()); + if (result) { + // TODO: in c++20 change to m_dict_cache to std::atomic + std::atomic_store(&m_dict_cache, result); } - return result.has_value(); + return static_cast(result); } bool query_cache::contains(const std::string &dictionary_name, const std::string &term) const { - std::shared_lock dict_read_lock{m_dict_mut}; - return m_dict_cache.contains(dictionary_name, term); + return m_dict_cache->contains(dictionary_name, term); } -optional_string query_cache::get(const std::string &dictionary_name) const { - std::shared_lock dict_read_lock{m_dict_mut}; - return m_dict_cache.get(dictionary_name); +optional_string query_cache::get_random( + const std::string &dictionary_name) const { + return m_dict_cache->get_random(dictionary_name); } bool query_cache::remove(const std::string &dictionary_name) { - std::unique_lock dict_write_lock{m_dict_mut}; - masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = global_query_builder::instance().delete_for_dictionary(dictionary_name); @@ -171,13 +167,11 @@ bool query_cache::remove(const std::string &dictionary_name) { return false; } - return m_dict_cache.remove(dictionary_name); + return m_dict_cache->remove(dictionary_name); } bool query_cache::remove(const std::string &dictionary_name, const std::string &term) { - std::shared_lock dict_read_lock{m_dict_mut}; - masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = global_query_builder::instance().delete_for_dictionary_and_term( dictionary_name, term); @@ -186,13 +180,11 @@ bool query_cache::remove(const std::string &dictionary_name, return false; } - return m_dict_cache.remove(dictionary_name, term); + return m_dict_cache->remove(dictionary_name, term); } bool query_cache::insert(const std::string &dictionary_name, const std::string &term) { - std::unique_lock dict_write_lock{m_dict_mut}; - masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = global_query_builder::instance().insert_ignore_record( dictionary_name, term); @@ -201,7 +193,7 @@ bool query_cache::insert(const std::string &dictionary_name, return false; } - return m_dict_cache.insert(dictionary_name, term); + return m_dict_cache->insert(dictionary_name, term); } } // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/registration_routines.cpp b/components/masking_functions/src/masking_functions/registration_routines.cpp index af4a071f786b..c85d46e2d864 100644 --- a/components/masking_functions/src/masking_functions/registration_routines.cpp +++ b/components/masking_functions/src/masking_functions/registration_routines.cpp @@ -973,7 +973,7 @@ class gen_blocklist_impl { } } - auto sresult = global_query_cache::instance().get(cs_dict_b_escaped); + auto sresult = global_query_cache::instance().get_random(cs_dict_b_escaped); if (sresult && !sresult->empty()) { masking_functions::charset_string utf8_result{ @@ -1018,7 +1018,8 @@ class gen_dictionary_impl { const mysqlpp::udf_context &ctx) { const auto cs_dictionary_escaped = escape_string(make_charset_string_from_arg(ctx, 0)); - auto sresult = global_query_cache::instance().get(cs_dictionary_escaped); + auto sresult = + global_query_cache::instance().get_random(cs_dictionary_escaped); if (sresult && !sresult->empty()) { return *sresult; diff --git a/components/masking_functions/src/masking_functions/sql_context.cpp b/components/masking_functions/src/masking_functions/sql_context.cpp index 8266afb91956..5e0d4a758d54 100644 --- a/components/masking_functions/src/masking_functions/sql_context.cpp +++ b/components/masking_functions/src/masking_functions/sql_context.cpp @@ -19,9 +19,9 @@ #include #include -#include "masking_functions/sql_context.hpp" - +#include "masking_functions/bookshelf.hpp" #include "masking_functions/command_service_tuple.hpp" +#include "masking_functions/sql_context.hpp" namespace { @@ -82,7 +82,7 @@ sql_context::sql_context(const command_service_tuple &services) } } -optional_dictionary_container sql_context::query_list(std::string_view query) { +bookshelf_ptr sql_context::query_list(std::string_view query) { if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), query.length()) != 0) { throw std::runtime_error{"Error while executing SQL query"}; @@ -115,7 +115,7 @@ optional_dictionary_container sql_context::query_list(std::string_view query) { &row_count) != 0) throw std::runtime_error{"Couldn't query row count"}; - optional_dictionary_container result{std::in_place, dictionary_container{}}; + bookshelf_ptr result{std::make_shared()}; for (auto i = row_count; i > 0; --i) { MYSQL_ROW_H row = nullptr; From bc4179bb5d3fb496a24e74c8bbf944dbb6e2383e Mon Sep 17 00:00:00 2001 From: Yura Sorokin Date: Wed, 24 Apr 2024 18:55:53 +0200 Subject: [PATCH 5/8] PS-9148: Minor refactoring to break dependencies https://perconadev.atlassian.net/browse/PS-9148 Introduced 'component_sys_variable_service_tuple' class for groupping comonent system variable registration services (supposed to be used with 'primitive_singleton' class template). 'query_cache' now expects 'query_builder' and 'flusher_interval_seconds' as its constructor's parameters. Eliminates custom MySQL types (like 'ulonglong') and its includes (like 'my_inttypes.h') from the publicly facing headers. 'query_cache' is now explicitly initialized / deinitialized in the component's 'init()'' / 'deinit()'' functions via 'primitive_singleton' interface. 'query_cache' helper thread-related methods made private. --- components/masking_functions/CMakeLists.txt | 4 + .../component_sys_variable_service_tuple.hpp | 47 ++++++ ...mponent_sys_variable_service_tuple_fwd.hpp | 25 +++ .../masking_functions/query_builder.hpp | 4 +- .../masking_functions/query_builder_fwd.hpp | 29 ++++ .../include/masking_functions/query_cache.hpp | 25 ++- .../masking_functions/query_cache_fwd.hpp | 29 ++++ .../include/masking_functions/sys_vars.hpp | 12 +- .../masking_functions/src/component.cpp | 40 ++++- .../src/masking_functions/query_cache.cpp | 152 +++++++++--------- .../registration_routines.cpp | 23 +-- .../src/masking_functions/sys_vars.cpp | 43 ++--- 12 files changed, 305 insertions(+), 128 deletions(-) create mode 100644 components/masking_functions/include/masking_functions/component_sys_variable_service_tuple.hpp create mode 100644 components/masking_functions/include/masking_functions/component_sys_variable_service_tuple_fwd.hpp create mode 100644 components/masking_functions/include/masking_functions/query_builder_fwd.hpp create mode 100644 components/masking_functions/include/masking_functions/query_cache_fwd.hpp diff --git a/components/masking_functions/CMakeLists.txt b/components/masking_functions/CMakeLists.txt index d0f20bcfbd54..47bc8edf0e0d 100644 --- a/components/masking_functions/CMakeLists.txt +++ b/components/masking_functions/CMakeLists.txt @@ -45,10 +45,14 @@ set(DATAMASKING_SOURCES include/masking_functions/charset_string_operations.hpp include/masking_functions/command_service_tuple_fwd.hpp include/masking_functions/command_service_tuple.hpp + include/masking_functions/component_sys_variable_service_tuple_fwd.hpp + include/masking_functions/component_sys_variable_service_tuple.hpp include/masking_functions/dictionary_fwd.hpp include/masking_functions/dictionary.hpp include/masking_functions/primitive_singleton.hpp + include/masking_functions/query_builder_fwd.hpp include/masking_functions/query_builder.hpp + include/masking_functions/query_cache_fwd.hpp include/masking_functions/query_cache.hpp include/masking_functions/random_string_generators.hpp include/masking_functions/registration_routines.hpp diff --git a/components/masking_functions/include/masking_functions/component_sys_variable_service_tuple.hpp b/components/masking_functions/include/masking_functions/component_sys_variable_service_tuple.hpp new file mode 100644 index 000000000000..20f46e2c3711 --- /dev/null +++ b/components/masking_functions/include/masking_functions/component_sys_variable_service_tuple.hpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2023 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_COMPONENT_SYS_VARIABLE_SERVICE_TUPLE_HPP +#define MASKING_FUNCTIONS_COMPONENT_SYS_VARIABLE_SERVICE_TUPLE_HPP + +#include + +#include + +#include "masking_functions/component_sys_variable_service_tuple_fwd.hpp" + +namespace masking_functions { + +// A set of MySQL query services required to perform system variable +// registration / unregistration. +// It is recommended to be used in a combination with the +// 'primitive_singleton' class template. +// +// primitive_singleton::instance() = +// component_sys_variable_service_tuple{ +// component_sys_variable_register, +// component_sys_variable_unregister +// }; +// ... +// sql_context +// ctx{primitive_singleton::instance()}; +struct component_sys_variable_service_tuple { + SERVICE_TYPE(component_sys_variable_register) * registrator; + SERVICE_TYPE(component_sys_variable_unregister) * unregistrator; +}; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_COMPONENT_SYS_VARIABLE_SERVICE_TUPLE_HPP diff --git a/components/masking_functions/include/masking_functions/component_sys_variable_service_tuple_fwd.hpp b/components/masking_functions/include/masking_functions/component_sys_variable_service_tuple_fwd.hpp new file mode 100644 index 000000000000..4bf98031e9d8 --- /dev/null +++ b/components/masking_functions/include/masking_functions/component_sys_variable_service_tuple_fwd.hpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2023 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_COMPONENT_SYS_VARIABLE_SERVICE_TUPLE_FWD_HPP +#define MASKING_FUNCTIONS_COMPONENT_SYS_VARIABLE_SERVICE_TUPLE_FWD_HPP + +namespace masking_functions { + +struct component_sys_variable_service_tuple; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_COMPONENT_SYS_VARIABLE_SERVICE_TUPLE_FWD_HPP diff --git a/components/masking_functions/include/masking_functions/query_builder.hpp b/components/masking_functions/include/masking_functions/query_builder.hpp index 6d6629914597..1ed22abaaabd 100644 --- a/components/masking_functions/include/masking_functions/query_builder.hpp +++ b/components/masking_functions/include/masking_functions/query_builder.hpp @@ -16,7 +16,7 @@ #ifndef MASKING_FUNCTIONS_QUERY_BUILDER_HPP #define MASKING_FUNCTIONS_QUERY_BUILDER_HPP -#include "masking_functions/sys_vars.hpp" +#include "masking_functions/query_builder_fwd.hpp" #include #include @@ -35,7 +35,7 @@ class query_builder { static constexpr std::string_view default_term_field_name = "Term"; explicit query_builder( - std::string_view database_name = sys_vars::get_dict_database_name(), + std::string_view database_name, std::string_view table_name = default_table_name, std::string_view dictionary_field_name = default_dictionary_field_name, std::string_view term_field_name = default_term_field_name) diff --git a/components/masking_functions/include/masking_functions/query_builder_fwd.hpp b/components/masking_functions/include/masking_functions/query_builder_fwd.hpp new file mode 100644 index 000000000000..4912dfd080d0 --- /dev/null +++ b/components/masking_functions/include/masking_functions/query_builder_fwd.hpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2023 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_QUERY_BUILDER_FWD_HPP +#define MASKING_FUNCTIONS_QUERY_BUILDER_FWD_HPP + +#include + +namespace masking_functions { + +class query_builder; + +using query_builder_ptr = std::unique_ptr; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_QUERY_BUILDER_FWD_HPP diff --git a/components/masking_functions/include/masking_functions/query_cache.hpp b/components/masking_functions/include/masking_functions/query_cache.hpp index 2bbff0fb48e4..d39777c8742b 100644 --- a/components/masking_functions/include/masking_functions/query_cache.hpp +++ b/components/masking_functions/include/masking_functions/query_cache.hpp @@ -16,6 +16,8 @@ #ifndef MASKING_FUNCTIONS_QUERY_CACHE_HPP #define MASKING_FUNCTIONS_QUERY_CACHE_HPP +#include "masking_functions/query_cache_fwd.hpp" + #include #include #include @@ -23,16 +25,18 @@ #include #include -#include #include -#include "masking_functions/bookshelf.hpp" +#include "masking_functions/bookshelf_fwd.hpp" +#include "masking_functions/dictionary_fwd.hpp" +#include "masking_functions/query_builder_fwd.hpp" namespace masking_functions { class query_cache { public: - query_cache(); + query_cache(query_builder_ptr query_builder, + std::uint64_t flusher_interval_seconds); query_cache(const query_cache &other) = delete; query_cache(query_cache &&other) = delete; query_cache &operator=(const query_cache &other) = delete; @@ -45,16 +49,15 @@ class query_cache { bool remove(const std::string &dictionary_name); bool remove(const std::string &dictionary_name, const std::string &term); bool insert(const std::string &dictionary_name, const std::string &term); - bool load_cache(); - void init_thd() noexcept; - void release_thd() noexcept; - void dict_flusher() noexcept; + bool load_cache(); private: bookshelf_ptr m_dict_cache; - ulonglong m_flusher_interval_seconds; + query_builder_ptr m_query_builder; + + std::uint64_t m_flusher_interval_seconds; std::atomic m_is_flusher_stopped; std::mutex m_flusher_mutex; std::condition_variable m_flusher_condition_var; @@ -63,6 +66,12 @@ class query_cache { my_thread_handle m_flusher_thread; my_thread_attr_t m_flusher_thread_attr; std::unique_ptr m_flusher_thd; + + void init_thd() noexcept; + void release_thd() noexcept; + void dict_flusher() noexcept; + + static void *run_dict_flusher(void *arg); }; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/query_cache_fwd.hpp b/components/masking_functions/include/masking_functions/query_cache_fwd.hpp new file mode 100644 index 000000000000..f807b8013476 --- /dev/null +++ b/components/masking_functions/include/masking_functions/query_cache_fwd.hpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ + +#ifndef MASKING_FUNCTIONS_QUERY_CACHE_FWD_HPP +#define MASKING_FUNCTIONS_QUERY_CACHE_FWD_HPP + +#include + +namespace masking_functions { + +class query_cache; + +using query_cache_ptr = std::unique_ptr; + +} // namespace masking_functions + +#endif // MASKING_FUNCTIONS_QUERY_CACHE_FWD_HPP diff --git a/components/masking_functions/include/masking_functions/sys_vars.hpp b/components/masking_functions/include/masking_functions/sys_vars.hpp index 599773df3816..767eccf3a9d4 100644 --- a/components/masking_functions/include/masking_functions/sys_vars.hpp +++ b/components/masking_functions/include/masking_functions/sys_vars.hpp @@ -16,19 +16,19 @@ #ifndef MASKING_FUNCTIONS_SYS_VARS_HPP #define MASKING_FUNCTIONS_SYS_VARS_HPP -#include "my_inttypes.h" - +#include +#include #include -namespace masking_functions::sys_vars { +namespace masking_functions { std::string_view get_dict_database_name() noexcept; -ulonglong get_flush_interval_seconds() noexcept; +std::uint64_t get_flush_interval_seconds() noexcept; bool register_sys_vars(); bool unregister_sys_vars(); -bool validate(); +bool check_sys_vars(std::string &error_message); -} // namespace masking_functions::sys_vars +} // namespace masking_functions #endif // MASKING_FUNCTIONS_SYS_VARS_HPP diff --git a/components/masking_functions/src/component.cpp b/components/masking_functions/src/component.cpp index d83b85de5202..6b1a0508df4f 100644 --- a/components/masking_functions/src/component.cpp +++ b/components/masking_functions/src/component.cpp @@ -35,7 +35,10 @@ #include #include "masking_functions/command_service_tuple.hpp" +#include "masking_functions/component_sys_variable_service_tuple.hpp" #include "masking_functions/primitive_singleton.hpp" +#include "masking_functions/query_builder.hpp" +#include "masking_functions/query_cache.hpp" #include "masking_functions/registration_routines.hpp" #include "masking_functions/string_service_tuple.hpp" #include "masking_functions/sys_vars.hpp" @@ -117,6 +120,12 @@ static mysql_service_status_t component_init() { mysql_service_mysql_command_query_result, mysql_service_mysql_command_options, mysql_service_mysql_command_factory}; + masking_functions::primitive_singleton< + masking_functions::component_sys_variable_service_tuple>::instance() = + masking_functions::component_sys_variable_service_tuple{ + // TODO: convert this to designated initializers in c++20 + mysql_service_component_sys_variable_register, + mysql_service_component_sys_variable_unregister}; // here we use a custom error reporting function // 'masking_functions_my_error()' based on the @@ -132,20 +141,34 @@ static mysql_service_status_t component_init() { return 1; } - if (!masking_functions::sys_vars::register_sys_vars() || - !masking_functions::sys_vars::validate()) { + if (!masking_functions::register_sys_vars()) { LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, "Cannot register system variables"); component_deinit(); return 1; } + std::string check_error_message; + if (!masking_functions::check_sys_vars(check_error_message)) { + LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, + check_error_message.c_str()); + component_deinit(); + return 1; + } + if (!masking_functions::register_udfs()) { LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, "Cannot register UDFs"); component_deinit(); return 1; } + auto builder{std::make_unique( + masking_functions::get_dict_database_name())}; + masking_functions::primitive_singleton< + masking_functions::query_cache_ptr>::instance() = + std::make_unique( + std::move(builder), masking_functions::get_flush_interval_seconds()); + LogComponentErr(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "Component successfully initialized"); return 0; @@ -153,20 +176,25 @@ static mysql_service_status_t component_init() { static mysql_service_status_t component_deinit() { int result = 0; + + masking_functions::primitive_singleton< + masking_functions::query_cache_ptr>::instance() + .reset(); + if (!masking_functions::unregister_udfs()) { LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, "Cannot unregister UDFs"); result = 1; } - if (!masking_functions::unregister_dynamic_privileges()) { + if (!masking_functions::unregister_sys_vars()) { LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, - "Cannot unregister dynamic privilege"); + "Cannot unregister system variables"); result = 1; } - if (!masking_functions::sys_vars::unregister_sys_vars()) { + if (!masking_functions::unregister_dynamic_privileges()) { LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, - "Cannot unregister system variables"); + "Cannot unregister dynamic privilege"); result = 1; } diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp index 386f372b4811..93901ae4b9df 100644 --- a/components/masking_functions/src/masking_functions/query_cache.cpp +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -15,11 +15,8 @@ #include "masking_functions/query_cache.hpp" -#include "masking_functions/command_service_tuple.hpp" -#include "masking_functions/primitive_singleton.hpp" -#include "masking_functions/query_builder.hpp" -#include "masking_functions/sql_context.hpp" -#include "masking_functions/sys_vars.hpp" +#include +#include #include #include @@ -27,36 +24,33 @@ #include #include -#include -#include +#include "masking_functions/bookshelf.hpp" +#include "masking_functions/command_service_tuple.hpp" +#include "masking_functions/primitive_singleton.hpp" +#include "masking_functions/query_builder.hpp" +#include "masking_functions/sql_context.hpp" +#include "masking_functions/sys_vars.hpp" extern REQUIRES_SERVICE_PLACEHOLDER(log_builtins); -namespace masking_functions { namespace { +using global_command_services = masking_functions::primitive_singleton< + masking_functions::command_service_tuple>; + constexpr std::string_view psi_category_name{"masking_functions"}; constexpr std::string_view flusher_thd_psi_name{ "masking_functions_dict_flusher"}; constexpr std::string_view flusher_thd_psi_os_name{"mf_flusher"}; -using global_command_services = masking_functions::primitive_singleton< - masking_functions::command_service_tuple>; -using global_query_builder = - masking_functions::primitive_singleton; - -void *run_dict_flusher(void *arg) { - auto *self = reinterpret_cast(arg); - self->init_thd(); - self->dict_flusher(); - self->release_thd(); - return nullptr; -} +} // anonymous namespace -} // namespace +namespace masking_functions { -query_cache::query_cache() - : m_flusher_interval_seconds{sys_vars::get_flush_interval_seconds()}, +query_cache::query_cache(query_builder_ptr query_builder, + std::uint64_t flusher_interval_seconds) + : m_query_builder{std::move(query_builder)}, + m_flusher_interval_seconds{flusher_interval_seconds}, m_is_flusher_stopped{true} { load_cache(); @@ -89,55 +83,9 @@ query_cache::~query_cache() { } } -void query_cache::init_thd() noexcept { - auto *thd = new THD; - my_thread_init(); - thd->set_new_thread_id(); - thd->thread_stack = reinterpret_cast(&thd); - thd->store_globals(); - m_flusher_thd.reset(thd); -} - -void query_cache::release_thd() noexcept { my_thread_end(); } - -void query_cache::dict_flusher() noexcept { -#ifdef HAVE_PSI_THREAD_INTERFACE - { - struct PSI_thread *psi = m_flusher_thd->get_psi(); - PSI_THREAD_CALL(set_thread_id)(psi, m_flusher_thd->thread_id()); - PSI_THREAD_CALL(set_thread_THD)(psi, m_flusher_thd.get()); - PSI_THREAD_CALL(set_thread_command)(m_flusher_thd->get_command()); - PSI_THREAD_CALL(set_thread_info) - (STRING_WITH_LEN("Masking functions component cache flusher")); - } -#endif - - while (!m_is_flusher_stopped) { - std::unique_lock lock{m_flusher_mutex}; - const auto wait_started_at = std::chrono::system_clock::now(); - m_flusher_condition_var.wait_for( - lock, std::chrono::seconds{m_flusher_interval_seconds}, - [this, wait_started_at] { - return std::chrono::duration_cast( - std::chrono::system_clock::now() - wait_started_at) >= - std::chrono::seconds{m_flusher_interval_seconds} || - m_is_flusher_stopped.load(); - }); - - if (!m_is_flusher_stopped) { - load_cache(); - - DBUG_EXECUTE_IF("masking_functions_signal_on_cache_reload", { - const char act[] = "now SIGNAL masking_functions_cache_reload_done"; - assert(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act))); - };); - } - } -} - bool query_cache::load_cache() { masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = global_query_builder::instance().select_all_from_dictionary(); + auto query = m_query_builder->select_all_from_dictionary(); auto result = sql_ctx.query_list(query); if (result) { @@ -160,8 +108,7 @@ optional_string query_cache::get_random( bool query_cache::remove(const std::string &dictionary_name) { masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = - global_query_builder::instance().delete_for_dictionary(dictionary_name); + auto query = m_query_builder->delete_for_dictionary(dictionary_name); if (!sql_ctx.execute(query)) { return false; @@ -173,8 +120,8 @@ bool query_cache::remove(const std::string &dictionary_name) { bool query_cache::remove(const std::string &dictionary_name, const std::string &term) { masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = global_query_builder::instance().delete_for_dictionary_and_term( - dictionary_name, term); + auto query = + m_query_builder->delete_for_dictionary_and_term(dictionary_name, term); if (!sql_ctx.execute(query)) { return false; @@ -186,8 +133,7 @@ bool query_cache::remove(const std::string &dictionary_name, bool query_cache::insert(const std::string &dictionary_name, const std::string &term) { masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = global_query_builder::instance().insert_ignore_record( - dictionary_name, term); + auto query = m_query_builder->insert_ignore_record(dictionary_name, term); if (!sql_ctx.execute(query)) { return false; @@ -196,4 +142,58 @@ bool query_cache::insert(const std::string &dictionary_name, return m_dict_cache->insert(dictionary_name, term); } +void query_cache::init_thd() noexcept { + auto *thd = new THD; + my_thread_init(); + thd->set_new_thread_id(); + thd->thread_stack = reinterpret_cast(&thd); + thd->store_globals(); + m_flusher_thd.reset(thd); +} + +void query_cache::release_thd() noexcept { my_thread_end(); } + +void query_cache::dict_flusher() noexcept { +#ifdef HAVE_PSI_THREAD_INTERFACE + { + struct PSI_thread *psi = m_flusher_thd->get_psi(); + PSI_THREAD_CALL(set_thread_id)(psi, m_flusher_thd->thread_id()); + PSI_THREAD_CALL(set_thread_THD)(psi, m_flusher_thd.get()); + PSI_THREAD_CALL(set_thread_command)(m_flusher_thd->get_command()); + PSI_THREAD_CALL(set_thread_info) + (STRING_WITH_LEN("Masking functions component cache flusher")); + } +#endif + + while (!m_is_flusher_stopped) { + std::unique_lock lock{m_flusher_mutex}; + const auto wait_started_at = std::chrono::system_clock::now(); + m_flusher_condition_var.wait_for( + lock, std::chrono::seconds{m_flusher_interval_seconds}, + [this, wait_started_at] { + return std::chrono::duration_cast( + std::chrono::system_clock::now() - wait_started_at) >= + std::chrono::seconds{m_flusher_interval_seconds} || + m_is_flusher_stopped.load(); + }); + + if (!m_is_flusher_stopped) { + load_cache(); + + DBUG_EXECUTE_IF("masking_functions_signal_on_cache_reload", { + const char act[] = "now SIGNAL masking_functions_cache_reload_done"; + assert(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act))); + };); + } + } +} + +void *query_cache::run_dict_flusher(void *arg) { + auto *self = reinterpret_cast(arg); + self->init_thd(); + self->dict_flusher(); + self->release_thd(); + return nullptr; +} + } // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/registration_routines.cpp b/components/masking_functions/src/masking_functions/registration_routines.cpp index c85d46e2d864..a9ba416d404d 100644 --- a/components/masking_functions/src/masking_functions/registration_routines.cpp +++ b/components/masking_functions/src/masking_functions/registration_routines.cpp @@ -56,7 +56,7 @@ namespace { using global_string_services = masking_functions::primitive_singleton< masking_functions::string_service_tuple>; using global_query_cache = - masking_functions::primitive_singleton; + masking_functions::primitive_singleton; constexpr std::string_view masking_dictionaries_privilege_name = "MASKING_DICTIONARIES_ADMIN"; @@ -965,15 +965,16 @@ class gen_blocklist_impl { escape_string(make_charset_string_from_arg(ctx, 2)); { - auto sresult = global_query_cache::instance().contains(cs_dict_a_escaped, - cs_term_escaped); + auto sresult = global_query_cache::instance()->contains(cs_dict_a_escaped, + cs_term_escaped); if (!sresult) { return cs_term_escaped; } } - auto sresult = global_query_cache::instance().get_random(cs_dict_b_escaped); + auto sresult = + global_query_cache::instance()->get_random(cs_dict_b_escaped); if (sresult && !sresult->empty()) { masking_functions::charset_string utf8_result{ @@ -1019,7 +1020,7 @@ class gen_dictionary_impl { const auto cs_dictionary_escaped = escape_string(make_charset_string_from_arg(ctx, 0)); auto sresult = - global_query_cache::instance().get_random(cs_dictionary_escaped); + global_query_cache::instance()->get_random(cs_dictionary_escaped); if (sresult && !sresult->empty()) { return *sresult; @@ -1060,7 +1061,7 @@ class masking_dictionaries_flush_impl { mysqlpp::udf_result_t calculate(const mysqlpp::udf_context &ctx [[maybe_unused]]) { - if (!global_query_cache::instance().load_cache()) { + if (!global_query_cache::instance()->load_cache()) { return std::nullopt; } @@ -1107,7 +1108,7 @@ class masking_dictionary_remove_impl { const auto cs_dictionary_escaped = escape_string(make_charset_string_from_arg(ctx, 0)); - if (!global_query_cache::instance().remove(cs_dictionary_escaped)) { + if (!global_query_cache::instance()->remove(cs_dictionary_escaped)) { return std::nullopt; } @@ -1161,8 +1162,8 @@ class masking_dictionary_term_add_impl { const auto cs_term_escaped = escape_string(make_charset_string_from_arg(ctx, 1)); - if (!global_query_cache::instance().insert(cs_dictionary_escaped, - cs_term_escaped)) { + if (!global_query_cache::instance()->insert(cs_dictionary_escaped, + cs_term_escaped)) { return std::nullopt; } @@ -1216,8 +1217,8 @@ class masking_dictionary_term_remove_impl { const auto cs_term_escaped = escape_string(make_charset_string_from_arg(ctx, 1)); - if (!global_query_cache::instance().remove(cs_dictionary_escaped, - cs_term_escaped)) { + if (!global_query_cache::instance()->remove(cs_dictionary_escaped, + cs_term_escaped)) { return std::nullopt; } diff --git a/components/masking_functions/src/masking_functions/sys_vars.cpp b/components/masking_functions/src/masking_functions/sys_vars.cpp index 3b4a02d3f313..e569884ce47d 100644 --- a/components/masking_functions/src/masking_functions/sys_vars.cpp +++ b/components/masking_functions/src/masking_functions/sys_vars.cpp @@ -15,23 +15,25 @@ #include "masking_functions/sys_vars.hpp" +#include +#include +#include + #include #include #include #include -#include -#include -#include - -extern REQUIRES_SERVICE_PLACEHOLDER(component_sys_variable_register); -extern REQUIRES_SERVICE_PLACEHOLDER(component_sys_variable_unregister); -extern REQUIRES_SERVICE_PLACEHOLDER(log_builtins); +#include "masking_functions/component_sys_variable_service_tuple.hpp" +#include "masking_functions/primitive_singleton.hpp" -namespace masking_functions::sys_vars { namespace { +using global_component_sys_variable_services = + masking_functions::primitive_singleton< + masking_functions::component_sys_variable_service_tuple>; + using str_arg_check_type = STR_CHECK_ARG(str); using ulonglong_arg_check_type = INTEGRAL_CHECK_ARG(ulonglong); @@ -49,18 +51,21 @@ bool is_flush_interval_initialised = false; char *database_name; ulonglong flush_interval_seconds = 0; -} // namespace +} // anonymous namespace + +namespace masking_functions { std::string_view get_dict_database_name() noexcept { return database_name; } -ulonglong get_flush_interval_seconds() noexcept { +std::uint64_t get_flush_interval_seconds() noexcept { return flush_interval_seconds; } bool register_sys_vars() { str_arg_check_type check_db_name{default_database_name.data()}; - if (mysql_service_component_sys_variable_register->register_variable( + const auto &services{global_component_sys_variable_services::instance()}; + if (services.registrator->register_variable( component_name.data(), masking_database_var_name.data(), PLUGIN_VAR_STR | PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -75,7 +80,7 @@ bool register_sys_vars() { ulonglong_arg_check_type check_flush_interval{default_flush_interval_seconds, 0, ULLONG_MAX, 1}; - if (mysql_service_component_sys_variable_register->register_variable( + if (services.registrator->register_variable( component_name.data(), flush_interval_var_name.data(), PLUGIN_VAR_LONGLONG | PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -95,14 +100,15 @@ bool register_sys_vars() { bool unregister_sys_vars() { bool is_success = true; + const auto &services{global_component_sys_variable_services::instance()}; if (is_database_name_initialised && - mysql_service_component_sys_variable_unregister->unregister_variable( + services.unregistrator->unregister_variable( component_name.data(), masking_database_var_name.data()) != 0) { is_success = false; } if (is_flush_interval_initialised && - mysql_service_component_sys_variable_unregister->unregister_variable( + services.unregistrator->unregister_variable( component_name.data(), flush_interval_var_name.data()) != 0) { is_success = false; } @@ -110,14 +116,13 @@ bool unregister_sys_vars() { return is_success; } -bool validate() { - if (database_name == nullptr || strlen(database_name) == 0) { - LogComponentErr(ERROR_LEVEL, ER_LOG_PRINTF_MSG, - "Bad masking_functions.masking_database value"); +bool check_sys_vars(std::string &error_message) { + if (database_name == nullptr || std::strlen(database_name) == 0) { + error_message = "Bad masking_functions.masking_database value"; return false; } return true; } -} // namespace masking_functions::sys_vars +} // namespace masking_functions From 6fd48839c5b21f28e0abb7227397336b9d5812b2 Mon Sep 17 00:00:00 2001 From: Yura Sorokin Date: Wed, 24 Apr 2024 19:35:11 +0200 Subject: [PATCH 6/8] PS-9148: Refactored usage of std::string_view for c-interfaces https://perconadev.atlassian.net/browse/PS-9148 As std::string_view::data() is not guaranteed to be null-terminated, it is not safe to use it in old c-functions accepting 'const char *'. Some constants converted to arrays of char 'const char buffer[]{"value"}'. --- .../src/masking_functions/query_cache.cpp | 13 +++++----- .../src/masking_functions/sys_vars.cpp | 25 ++++++++----------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp index 93901ae4b9df..d1c9d14f83f5 100644 --- a/components/masking_functions/src/masking_functions/query_cache.cpp +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -38,10 +38,9 @@ namespace { using global_command_services = masking_functions::primitive_singleton< masking_functions::command_service_tuple>; -constexpr std::string_view psi_category_name{"masking_functions"}; -constexpr std::string_view flusher_thd_psi_name{ - "masking_functions_dict_flusher"}; -constexpr std::string_view flusher_thd_psi_os_name{"mf_flusher"}; +constexpr const char psi_category_name[]{"masking_functions"}; +constexpr const char flusher_thd_psi_name[]{"masking_functions_dict_flusher"}; +constexpr const char flusher_thd_psi_os_name[]{"mf_flusher"}; } // anonymous namespace @@ -56,12 +55,12 @@ query_cache::query_cache(query_builder_ptr query_builder, if (m_flusher_interval_seconds > 0) { PSI_thread_info thread_info{&m_psi_flusher_thread_key, - flusher_thd_psi_name.data(), - flusher_thd_psi_os_name.data(), + flusher_thd_psi_name, + flusher_thd_psi_os_name, PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}; - mysql_thread_register(psi_category_name.data(), &thread_info, 1); + mysql_thread_register(psi_category_name, &thread_info, 1); const auto res = mysql_thread_create(m_psi_flusher_thread_key, &m_flusher_thread, diff --git a/components/masking_functions/src/masking_functions/sys_vars.cpp b/components/masking_functions/src/masking_functions/sys_vars.cpp index e569884ce47d..bb54be1d1a00 100644 --- a/components/masking_functions/src/masking_functions/sys_vars.cpp +++ b/components/masking_functions/src/masking_functions/sys_vars.cpp @@ -34,15 +34,12 @@ using global_component_sys_variable_services = masking_functions::primitive_singleton< masking_functions::component_sys_variable_service_tuple>; -using str_arg_check_type = STR_CHECK_ARG(str); -using ulonglong_arg_check_type = INTEGRAL_CHECK_ARG(ulonglong); - -constexpr std::string_view component_name{"masking_functions"}; -constexpr std::string_view masking_database_var_name{"masking_database"}; -constexpr std::string_view flush_interval_var_name{ +constexpr const char component_name[]{"masking_functions"}; +constexpr const char masking_database_var_name[]{"masking_database"}; +constexpr const char flush_interval_var_name[]{ "dictionaries_flush_interval_seconds"}; -std::string default_database_name{"mysql"}; +char default_database_name[]{"mysql"}; const ulonglong default_flush_interval_seconds = 0; bool is_database_name_initialised = false; @@ -62,11 +59,11 @@ std::uint64_t get_flush_interval_seconds() noexcept { } bool register_sys_vars() { - str_arg_check_type check_db_name{default_database_name.data()}; + STR_CHECK_ARG(str) check_db_name{default_database_name}; const auto &services{global_component_sys_variable_services::instance()}; if (services.registrator->register_variable( - component_name.data(), masking_database_var_name.data(), + component_name, masking_database_var_name, PLUGIN_VAR_STR | PLUGIN_VAR_MEMALLOC | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Specifies the database to use for data masking dictionaries " @@ -77,11 +74,11 @@ bool register_sys_vars() { } is_database_name_initialised = true; - ulonglong_arg_check_type check_flush_interval{default_flush_interval_seconds, - 0, ULLONG_MAX, 1}; + INTEGRAL_CHECK_ARG(ulonglong) + check_flush_interval{default_flush_interval_seconds, 0, ULLONG_MAX, 1}; if (services.registrator->register_variable( - component_name.data(), flush_interval_var_name.data(), + component_name, flush_interval_var_name, PLUGIN_VAR_LONGLONG | PLUGIN_VAR_UNSIGNED | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Sets the interval, in seconds, to wait before attempting to " @@ -103,13 +100,13 @@ bool unregister_sys_vars() { const auto &services{global_component_sys_variable_services::instance()}; if (is_database_name_initialised && services.unregistrator->unregister_variable( - component_name.data(), masking_database_var_name.data()) != 0) { + component_name, masking_database_var_name) != 0) { is_success = false; } if (is_flush_interval_initialised && services.unregistrator->unregister_variable( - component_name.data(), flush_interval_var_name.data()) != 0) { + component_name, flush_interval_var_name) != 0) { is_success = false; } From 4cf7652a939a7b904432fd6fbb38d092b52dc378 Mon Sep 17 00:00:00 2001 From: Yura Sorokin Date: Tue, 30 Apr 2024 01:36:54 +0200 Subject: [PATCH 7/8] PS-9148: implemented lazy query_cache initial population https://perconadev.atlassian.net/browse/PS-9148 'command_service_tuple' struct extended with one more member - 'field_info' service. Reworked 'query_cache' class: instead of loading terms from the database in constructor, this operation is now performed in first attempt to access one of the dictionary methods ('contains()' / 'get_random()' / 'remove()' / 'insert()'). This is done in order to overcome a limitation that does not allow 'mysql_command_query' service to be used from inside the componment initialization function. Fixed problem with 'm_dict_cache' shared pointer updated concurrently from different threads. Exceptions thrown from the cache loading function no longer escape the flusher thread. De-coupled 'sql_context' and 'bookshelf' classes: 'sql_context' now accepts a generic insertion callback that can be used to populate any type of containers. 'component_masking_functions.dictionary_operations' MTR test case extended with additional checks for flushed / unflushed dictionary cache. --- .../command_service_tuple.hpp | 2 + .../include/masking_functions/query_cache.hpp | 12 ++- .../include/masking_functions/sql_context.hpp | 35 ++++++++- .../masking_functions/src/component.cpp | 3 + .../src/masking_functions/query_cache.cpp | 76 ++++++++++++++----- .../registration_routines.cpp | 4 +- .../src/masking_functions/sql_context.cpp | 61 ++++++++------- .../r/dictionary_operations.result | 22 +++++- .../t/dictionary_operations.test | 64 +++++++++++++++- 9 files changed, 222 insertions(+), 57 deletions(-) diff --git a/components/masking_functions/include/masking_functions/command_service_tuple.hpp b/components/masking_functions/include/masking_functions/command_service_tuple.hpp index d465ba2eded6..7c8b63634ed2 100644 --- a/components/masking_functions/include/masking_functions/command_service_tuple.hpp +++ b/components/masking_functions/include/masking_functions/command_service_tuple.hpp @@ -35,6 +35,7 @@ namespace masking_functions { // mysql_command_query{ // mysql_service_mysql_command_query, // mysql_service_mysql_command_query_result, +// mysql_service_mysql_command_field_info, // mysql_service_mysql_command_options, // mysql_service_mysql_command_factory // }; @@ -43,6 +44,7 @@ namespace masking_functions { struct command_service_tuple { SERVICE_TYPE(mysql_command_query) * query; SERVICE_TYPE(mysql_command_query_result) * query_result; + SERVICE_TYPE(mysql_command_field_info) * field_info; SERVICE_TYPE(mysql_command_options) * options; SERVICE_TYPE(mysql_command_factory) * factory; }; diff --git a/components/masking_functions/include/masking_functions/query_cache.hpp b/components/masking_functions/include/masking_functions/query_cache.hpp index d39777c8742b..a3a11f7e89cf 100644 --- a/components/masking_functions/include/masking_functions/query_cache.hpp +++ b/components/masking_functions/include/masking_functions/query_cache.hpp @@ -50,13 +50,15 @@ class query_cache { bool remove(const std::string &dictionary_name, const std::string &term); bool insert(const std::string &dictionary_name, const std::string &term); - bool load_cache(); + void reload_cache(); private: - bookshelf_ptr m_dict_cache; - query_builder_ptr m_query_builder; + // TODO: in c++20 change this to std::atomic and + // remove deprecated atomic_load() / atomic_store() + mutable bookshelf_ptr m_dict_cache; + std::uint64_t m_flusher_interval_seconds; std::atomic m_is_flusher_stopped; std::mutex m_flusher_mutex; @@ -72,6 +74,10 @@ class query_cache { void dict_flusher() noexcept; static void *run_dict_flusher(void *arg); + + bookshelf_ptr create_dict_cache_internal() const; + // returning deliberately by value to increase reference counter + bookshelf_ptr get_pinned_dict_cache_internal() const; }; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/sql_context.hpp b/components/masking_functions/include/masking_functions/sql_context.hpp index 37d2d3a67f9e..d5f51fe8f53a 100644 --- a/components/masking_functions/include/masking_functions/sql_context.hpp +++ b/components/masking_functions/include/masking_functions/sql_context.hpp @@ -16,12 +16,13 @@ #ifndef MASKING_FUNCTIONS_SQL_CONTEXT_HPP #define MASKING_FUNCTIONS_SQL_CONTEXT_HPP +#include +#include +#include #include -#include #include #include -#include "masking_functions/bookshelf_fwd.hpp" #include "masking_functions/command_service_tuple_fwd.hpp" namespace masking_functions { @@ -32,6 +33,13 @@ namespace masking_functions { // construction. class sql_context { public: + template + using field_value_container = std::array; + + template + using row_callback = + std::function &)>; + explicit sql_context(const command_service_tuple &services); sql_context(sql_context const &) = delete; @@ -46,9 +54,23 @@ class sql_context { return *impl_.get_deleter().services; } - bookshelf_ptr query_list(std::string_view query); + template + void execute_select(std::string_view query, + const row_callback &callback) { + execute_select_internal( + query, NumberOfFields, + [&callback](char **field_values, std::size_t *lengths) { + field_value_container wrapped_field_values; + std::transform(field_values, field_values + NumberOfFields, lengths, + std::begin(wrapped_field_values), + [](char *str, std::size_t len) { + return std::string_view{str, len}; + }); + callback(wrapped_field_values); + }); + } - bool execute(std::string_view query); + bool execute_dml(std::string_view query); private: struct deleter { @@ -57,6 +79,11 @@ class sql_context { }; using impl_type = std::unique_ptr; impl_type impl_; + + using row_internal_callback = std::function; + void execute_select_internal(std::string_view query, + std::size_t number_of_fields, + const row_internal_callback &callback); }; } // namespace masking_functions diff --git a/components/masking_functions/src/component.cpp b/components/masking_functions/src/component.cpp index 6b1a0508df4f..4bb0a6ea386a 100644 --- a/components/masking_functions/src/component.cpp +++ b/components/masking_functions/src/component.cpp @@ -60,6 +60,7 @@ REQUIRES_SERVICE_PLACEHOLDER(mysql_string_compare); REQUIRES_SERVICE_PLACEHOLDER(mysql_command_query); REQUIRES_SERVICE_PLACEHOLDER(mysql_command_query_result); +REQUIRES_SERVICE_PLACEHOLDER(mysql_command_field_info); REQUIRES_SERVICE_PLACEHOLDER(mysql_command_options); REQUIRES_SERVICE_PLACEHOLDER(mysql_command_factory); @@ -118,6 +119,7 @@ static mysql_service_status_t component_init() { // TODO: convert this to designated initializers in c++20 mysql_service_mysql_command_query, mysql_service_mysql_command_query_result, + mysql_service_mysql_command_field_info, mysql_service_mysql_command_options, mysql_service_mysql_command_factory}; masking_functions::primitive_singleton< @@ -227,6 +229,7 @@ BEGIN_COMPONENT_REQUIRES(CURRENT_COMPONENT_NAME) REQUIRES_SERVICE(mysql_command_query), REQUIRES_SERVICE(mysql_command_query_result), + REQUIRES_SERVICE(mysql_command_field_info), REQUIRES_SERVICE(mysql_command_options), REQUIRES_SERVICE(mysql_command_factory), diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp index d1c9d14f83f5..f7fd2406eefa 100644 --- a/components/masking_functions/src/masking_functions/query_cache.cpp +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -49,10 +49,15 @@ namespace masking_functions { query_cache::query_cache(query_builder_ptr query_builder, std::uint64_t flusher_interval_seconds) : m_query_builder{std::move(query_builder)}, + m_dict_cache{}, m_flusher_interval_seconds{flusher_interval_seconds}, m_is_flusher_stopped{true} { - load_cache(); + // we do not initialize m_dict_cache with create_dict_cache_internal() here + // as this constructor is called from the component initialization method + // and any call to mysql_command_query service may mess up with current THD + // the cache will be loaded during the first call to one of the dictionary + // functions or by the flusher thread if (m_flusher_interval_seconds > 0) { PSI_thread_info thread_info{&m_psi_flusher_thread_key, flusher_thd_psi_name, @@ -82,63 +87,62 @@ query_cache::~query_cache() { } } -bool query_cache::load_cache() { - masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = m_query_builder->select_all_from_dictionary(); - auto result = sql_ctx.query_list(query); - - if (result) { - // TODO: in c++20 change to m_dict_cache to std::atomic - std::atomic_store(&m_dict_cache, result); +void query_cache::reload_cache() { + auto local_dict_cache{create_dict_cache_internal()}; + if (!local_dict_cache) { + throw std::runtime_error{"Cannot load dictionary cache"}; } - return static_cast(result); + std::atomic_store(&m_dict_cache, local_dict_cache); } bool query_cache::contains(const std::string &dictionary_name, const std::string &term) const { - return m_dict_cache->contains(dictionary_name, term); + return get_pinned_dict_cache_internal()->contains(dictionary_name, term); } optional_string query_cache::get_random( const std::string &dictionary_name) const { - return m_dict_cache->get_random(dictionary_name); + return get_pinned_dict_cache_internal()->get_random(dictionary_name); } bool query_cache::remove(const std::string &dictionary_name) { + auto local_dict_cache{get_pinned_dict_cache_internal()}; masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = m_query_builder->delete_for_dictionary(dictionary_name); - if (!sql_ctx.execute(query)) { + if (!sql_ctx.execute_dml(query)) { return false; } - return m_dict_cache->remove(dictionary_name); + return local_dict_cache->remove(dictionary_name); } bool query_cache::remove(const std::string &dictionary_name, const std::string &term) { + auto local_dict_cache{get_pinned_dict_cache_internal()}; masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = m_query_builder->delete_for_dictionary_and_term(dictionary_name, term); - if (!sql_ctx.execute(query)) { + if (!sql_ctx.execute_dml(query)) { return false; } - return m_dict_cache->remove(dictionary_name, term); + return local_dict_cache->remove(dictionary_name, term); } bool query_cache::insert(const std::string &dictionary_name, const std::string &term) { + auto local_dict_cache{get_pinned_dict_cache_internal()}; masking_functions::sql_context sql_ctx{global_command_services::instance()}; auto query = m_query_builder->insert_ignore_record(dictionary_name, term); - if (!sql_ctx.execute(query)) { + if (!sql_ctx.execute_dml(query)) { return false; } - return m_dict_cache->insert(dictionary_name, term); + return local_dict_cache->insert(dictionary_name, term); } void query_cache::init_thd() noexcept { @@ -177,7 +181,10 @@ void query_cache::dict_flusher() noexcept { }); if (!m_is_flusher_stopped) { - load_cache(); + auto local_dict_cache{create_dict_cache_internal()}; + if (local_dict_cache) { + std::atomic_store(&m_dict_cache, local_dict_cache); + } DBUG_EXECUTE_IF("masking_functions_signal_on_cache_reload", { const char act[] = "now SIGNAL masking_functions_cache_reload_done"; @@ -195,4 +202,35 @@ void *query_cache::run_dict_flusher(void *arg) { return nullptr; } +bookshelf_ptr query_cache::create_dict_cache_internal() const { + bookshelf_ptr result; + try { + masking_functions::sql_context sql_ctx{global_command_services::instance()}; + auto query = m_query_builder->select_all_from_dictionary(); + auto local_dict_cache{std::make_shared()}; + sql_context::row_callback<2> result_inserter{[&terms = *local_dict_cache]( + const auto &field_values) { + terms.insert(std::string{field_values[0]}, std::string{field_values[1]}); + }}; + sql_ctx.execute_select(query, result_inserter); + result = local_dict_cache; + } catch (...) { + } + + return result; +} + +bookshelf_ptr query_cache::get_pinned_dict_cache_internal() const { + auto local_dict_cache{std::atomic_load(&m_dict_cache)}; + if (!local_dict_cache) { + local_dict_cache = create_dict_cache_internal(); + if (!local_dict_cache) { + throw std::runtime_error{"Cannot load dictionary cache"}; + } + std::atomic_store(&m_dict_cache, local_dict_cache); + } + + return local_dict_cache; +} + } // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/registration_routines.cpp b/components/masking_functions/src/masking_functions/registration_routines.cpp index a9ba416d404d..b8ab6f3dafcc 100644 --- a/components/masking_functions/src/masking_functions/registration_routines.cpp +++ b/components/masking_functions/src/masking_functions/registration_routines.cpp @@ -1061,9 +1061,7 @@ class masking_dictionaries_flush_impl { mysqlpp::udf_result_t calculate(const mysqlpp::udf_context &ctx [[maybe_unused]]) { - if (!global_query_cache::instance()->load_cache()) { - return std::nullopt; - } + global_query_cache::instance()->reload_cache(); return "1"; } diff --git a/components/masking_functions/src/masking_functions/sql_context.cpp b/components/masking_functions/src/masking_functions/sql_context.cpp index 5e0d4a758d54..008740fa1cad 100644 --- a/components/masking_functions/src/masking_functions/sql_context.cpp +++ b/components/masking_functions/src/masking_functions/sql_context.cpp @@ -19,7 +19,6 @@ #include #include -#include "masking_functions/bookshelf.hpp" #include "masking_functions/command_service_tuple.hpp" #include "masking_functions/sql_context.hpp" @@ -82,12 +81,38 @@ sql_context::sql_context(const command_service_tuple &services) } } -bookshelf_ptr sql_context::query_list(std::string_view query) { +bool sql_context::execute_dml(std::string_view query) { + if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), + query.length()) != 0) { + return false; + } + std::uint64_t row_count = 0; + if ((*get_services().query->affected_rows)(to_mysql_h(impl_.get()), + &row_count) != 0) { + return false; + } + return row_count > 0; +} + +void sql_context::execute_select_internal( + std::string_view query, std::size_t expected_number_of_fields, + const row_internal_callback &callback) { if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), query.length()) != 0) { throw std::runtime_error{"Error while executing SQL query"}; } + unsigned int actual_number_of_fields; + if ((*get_services().field_info->field_count)( + to_mysql_h(impl_.get()), &actual_number_of_fields) != 0) { + throw std::runtime_error{"Couldn't get number of fields"}; + } + + if (actual_number_of_fields != expected_number_of_fields) { + throw std::runtime_error{ + "Micmatch between actual and expected number of fields"}; + } + MYSQL_RES_H mysql_res = nullptr; if ((*get_services().query_result->store_result)(to_mysql_h(impl_.get()), &mysql_res) != 0) { @@ -106,7 +131,7 @@ bookshelf_ptr sql_context::query_list(std::string_view query) { std::unique_ptr; mysql_res_ptr mysql_res_guard(mysql_res, std::move(mysql_res_deleter)); - uint64_t row_count = 0; + std::uint64_t row_count = 0; // As the 'affected_rows()' method of the 'mysql_command_query' MySQL // service is implementted via 'mysql_affected_rows()' MySQL client // function, it is OK to use it for SELECT statements as well, because @@ -115,35 +140,19 @@ bookshelf_ptr sql_context::query_list(std::string_view query) { &row_count) != 0) throw std::runtime_error{"Couldn't query row count"}; - bookshelf_ptr result{std::make_shared()}; - for (auto i = row_count; i > 0; --i) { - MYSQL_ROW_H row = nullptr; - ulong *length = nullptr; + MYSQL_ROW_H field_values = nullptr; + ulong *field_value_lengths = nullptr; - if ((*get_services().query_result->fetch_row)(mysql_res, &row) != 0) + if ((*get_services().query_result->fetch_row)(mysql_res, &field_values) != + 0) throw std::runtime_error{"Couldn't fetch length"}; - if ((*get_services().query_result->fetch_lengths)(mysql_res, &length) != 0) + if ((*get_services().query_result->fetch_lengths)( + mysql_res, &field_value_lengths) != 0) throw std::runtime_error{"Couldn't fetch length"}; - result->insert(std::string{row[0], length[0]}, - std::string{row[1], length[1]}); - } - - return result; -} - -bool sql_context::execute(std::string_view query) { - if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), - query.length()) != 0) { - return false; + callback(field_values, field_value_lengths); } - uint64_t row_count = 0; - if ((*get_services().query->affected_rows)(to_mysql_h(impl_.get()), - &row_count) != 0) { - return false; - } - return row_count > 0; } } // namespace masking_functions diff --git a/mysql-test/suite/component_masking_functions/r/dictionary_operations.result b/mysql-test/suite/component_masking_functions/r/dictionary_operations.result index 4cff6e1e1547..6323ed2c60f0 100644 --- a/mysql-test/suite/component_masking_functions/r/dictionary_operations.result +++ b/mysql-test/suite/component_masking_functions/r/dictionary_operations.result @@ -59,6 +59,8 @@ SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); ERROR HY000: Error in command service backend interface, because of : "Table 'mysql.masking_dictionaries' doesn't exist" SELECT gen_dictionary('us_cities'); ERROR HY000: Error in command service backend interface, because of : "Table 'mysql.masking_dictionaries' doesn't exist" +SELECT masking_dictionaries_flush(); +ERROR HY000: Error in command service backend interface, because of : "Table 'mysql.masking_dictionaries' doesn't exist" # # NULL for NULL checks include/assert.inc [gen_blocklist() for the NULL primary argument should return NULL] @@ -97,12 +99,30 @@ include/assert.inc [collation of the result of evaluating 'gen_blocklist('Berlin INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city1'); INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city2'); INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city3'); -INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city4'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто1'); +include/assert.inc [gen_dictionary on a existing but not flushed dictionary must return NULL] +SELECT masking_dictionaries_flush(); +masking_dictionaries_flush() +1 +include/assert.inc [the number of distinct US city names after the first insert and flush must be 3] +INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city4'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто2'); +include/assert.inc [the number of distinct US city names after the second insert but before flush must be 3] +SELECT masking_dictionaries_flush(); +masking_dictionaries_flush() +1 +include/assert.inc [the number of distinct US city names after the second insert and flush must be 4] +RENAME TABLE mysql.masking_dictionaries TO mysql.masking_dictionaries_hidden; +include/assert.inc [the number of distinct US city names after hiding dict table must be 4] +SELECT masking_dictionaries_flush(); +ERROR HY000: Error in command service backend interface, because of : "Table 'mysql.masking_dictionaries' doesn't exist" +include/assert.inc [the number of distinct US city names after dict unsuccessful flush must be 4] +RENAME TABLE mysql.masking_dictionaries_hidden TO mysql.masking_dictionaries; +include/assert.inc [the number of distinct US city names after restoring dict table must be 4] SELECT masking_dictionaries_flush(); masking_dictionaries_flush() 1 +include/assert.inc [the number of distinct US city names after restoring dict table and flush must be 4] include/assert.inc [gen_dictionary on a non-existing dictionary must return NULL] SET @check_expression_result = gen_dictionary('us_cities'); include/assert.inc [the result of evaluating 'gen_dictionary('us_cities')' must match the 'city[[:digit:]]{1}' pattern] diff --git a/mysql-test/suite/component_masking_functions/t/dictionary_operations.test b/mysql-test/suite/component_masking_functions/t/dictionary_operations.test index da2dd0655c23..7c80ebd1a54c 100644 --- a/mysql-test/suite/component_masking_functions/t/dictionary_operations.test +++ b/mysql-test/suite/component_masking_functions/t/dictionary_operations.test @@ -83,6 +83,11 @@ SELECT gen_blocklist('Berlin', 'de_cities', 'us_cities'); --error ER_COMMAND_SERVICE_BACKEND_FAILED SELECT gen_dictionary('us_cities'); +--connection con_priv +--error ER_COMMAND_SERVICE_BACKEND_FAILED +SELECT masking_dictionaries_flush(); +--connection con_unpriv + --echo # --echo # NULL for NULL checks @@ -148,14 +153,71 @@ CREATE TABLE mysql.masking_dictionaries( INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city1'); INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city2'); INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city3'); -INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city4'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто1'); + +--let $assert_cond = gen_dictionary("us_cities") IS NULL +--let $assert_text = gen_dictionary on a existing but not flushed dictionary must return NULL +--source include/assert.inc + +--connection con_priv +SELECT masking_dictionaries_flush(); +--connection con_unpriv + +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after the first insert and flush must be 3 +--source include/assert.inc + +--connection default +INSERT INTO mysql.masking_dictionaries VALUES('us_cities', 'city4'); INSERT INTO mysql.masking_dictionaries VALUES('укр_міста', 'місто2'); +--connection con_unpriv +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after the second insert but before flush must be 3 +--source include/assert.inc + --connection con_priv SELECT masking_dictionaries_flush(); --connection con_unpriv +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3,city4" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after the second insert and flush must be 4 +--source include/assert.inc + +--connection default +RENAME TABLE mysql.masking_dictionaries TO mysql.masking_dictionaries_hidden; +--connection con_unpriv + +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3,city4" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after hiding dict table must be 4 +--source include/assert.inc + +--connection con_priv +--error ER_COMMAND_SERVICE_BACKEND_FAILED +SELECT masking_dictionaries_flush(); +--connection con_unpriv + +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3,city4" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after dict unsuccessful flush must be 4 +--source include/assert.inc + +--connection default +RENAME TABLE mysql.masking_dictionaries_hidden TO mysql.masking_dictionaries; +--connection con_unpriv + +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3,city4" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after restoring dict table must be 4 +--source include/assert.inc + +--connection con_priv +SELECT masking_dictionaries_flush(); +--connection con_unpriv + +--let $assert_cond = [ SELECT GROUP_CONCAT(val ORDER BY val) = "city1,city2,city3,city4" FROM (SELECT gen_dictionary("us_cities") AS term FROM SEQUENCE_TABLE(100) AS tt GROUP BY term) AS tbl(val) ] = 1 +--let $assert_text = the number of distinct US city names after restoring dict table and flush must be 4 +--source include/assert.inc + + --let $assert_cond = gen_dictionary("de_cities") IS NULL --let $assert_text = gen_dictionary on a non-existing dictionary must return NULL --source include/assert.inc From 848a99e54e403a895bb9f5e5da32a1c03f91e8be Mon Sep 17 00:00:00 2001 From: Yura Sorokin Date: Thu, 2 May 2024 13:42:50 +0200 Subject: [PATCH 8/8] PS-9148: Reworked dictionary / bookshelf thread-safety model https://perconadev.atlassian.net/browse/PS-9148 Both 'dictionary' and 'bookshelf' classes no longer include their own 'std::shared_mutex' to protect data. Instead, we now have a single 'std::shared_mutex' at the 'query_cache' level. The return value of the 'get_random()' method in both 'dictionary' and 'bookshelf' classes changed from 'optional_string' to 'std::string_view'. Empty (default constructed) 'std::string_view' is used as an indicator of an unsuccessful operation. 'get_random()' method in the 'query_cache' class still returns a string by value to avoid race conditions. Changed the behaviour of the 'sql_context::execute_dml()' method - it now throws when SQL errors (like "no table found", etc.) occur. --- .../include/masking_functions/bookshelf.hpp | 14 +- .../masking_functions/bookshelf_fwd.hpp | 2 +- .../include/masking_functions/dictionary.hpp | 11 +- .../masking_functions/dictionary_fwd.hpp | 6 +- .../include/masking_functions/query_cache.hpp | 36 ++-- .../src/masking_functions/bookshelf.cpp | 78 +++----- .../src/masking_functions/dictionary.cpp | 21 +-- .../src/masking_functions/query_cache.cpp | 172 +++++++++++------- .../registration_routines.cpp | 8 +- .../src/masking_functions/sql_context.cpp | 7 +- 10 files changed, 185 insertions(+), 170 deletions(-) diff --git a/components/masking_functions/include/masking_functions/bookshelf.hpp b/components/masking_functions/include/masking_functions/bookshelf.hpp index 25fb70abb939..7fc7e8abe5d0 100644 --- a/components/masking_functions/include/masking_functions/bookshelf.hpp +++ b/components/masking_functions/include/masking_functions/bookshelf.hpp @@ -18,8 +18,8 @@ #include "masking_functions/bookshelf_fwd.hpp" -#include #include +#include #include #include "masking_functions/dictionary_fwd.hpp" @@ -28,16 +28,18 @@ namespace masking_functions { class bookshelf { public: - bookshelf() = default; + bookshelf(); bookshelf(const dictionary &) = delete; bookshelf(bookshelf &&) = delete; bookshelf &operator=(const bookshelf &) = delete; bookshelf &operator=(bookshelf &&) = delete; + ~bookshelf(); bool contains(const std::string &dictionary_name, const std::string &term) const noexcept; - // returning a copy deliberately for thread safety - optional_string get_random(const std::string &dictionary_name) const noexcept; + // returns empty std::string_view if no such dictionary exist + std::string_view get_random( + const std::string &dictionary_name) const noexcept; bool remove(const std::string &dictionary_name) noexcept; bool remove(const std::string &dictionary_name, const std::string &term) noexcept; @@ -49,10 +51,6 @@ class bookshelf { // transparent_string_like_hash, std::equal_to<>>. using dictionary_container = std::unordered_map; dictionary_container dictionaries_; - mutable std::shared_mutex dictionaries_mutex_; - - dictionary_ptr find_dictionary_internal( - const std::string &dictionary_name) const noexcept; }; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp b/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp index 6b7b869ce553..ea5982d7c6d2 100644 --- a/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp +++ b/components/masking_functions/include/masking_functions/bookshelf_fwd.hpp @@ -22,7 +22,7 @@ namespace masking_functions { class bookshelf; -using bookshelf_ptr = std::shared_ptr; +using bookshelf_ptr = std::unique_ptr; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/dictionary.hpp b/components/masking_functions/include/masking_functions/dictionary.hpp index 1cbcac5027a8..18920fba4e11 100644 --- a/components/masking_functions/include/masking_functions/dictionary.hpp +++ b/components/masking_functions/include/masking_functions/dictionary.hpp @@ -18,8 +18,8 @@ #include "masking_functions/dictionary_fwd.hpp" -#include #include +#include #include namespace masking_functions { @@ -34,9 +34,13 @@ class dictionary { dictionary &operator=(const dictionary &) = delete; dictionary &operator=(dictionary &&) = delete; + ~dictionary() = default; + + bool is_empty() const noexcept { return terms_.empty(); } + bool contains(const std::string &term) const noexcept; - // returning a copy deliberately for thread safety - optional_string get_random() const; + // returns empty std::string_view if the dictionary is empty + std::string_view get_random() const noexcept; bool insert(const std::string &term); bool remove(const std::string &term) noexcept; @@ -46,7 +50,6 @@ class dictionary { // transparent_string_like_hash, std::equal_to<>>. using term_container = std::unordered_set; term_container terms_; - mutable std::shared_mutex terms_mutex_; }; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/dictionary_fwd.hpp b/components/masking_functions/include/masking_functions/dictionary_fwd.hpp index 346781b55109..16e4fa0da013 100644 --- a/components/masking_functions/include/masking_functions/dictionary_fwd.hpp +++ b/components/masking_functions/include/masking_functions/dictionary_fwd.hpp @@ -17,16 +17,12 @@ #define MASKING_FUNCTIONS_DICTIONARY_FWD_HPP #include -#include -#include namespace masking_functions { -using optional_string = std::optional; - class dictionary; -using dictionary_ptr = std::shared_ptr; +using dictionary_ptr = std::unique_ptr; } // namespace masking_functions diff --git a/components/masking_functions/include/masking_functions/query_cache.hpp b/components/masking_functions/include/masking_functions/query_cache.hpp index a3a11f7e89cf..70fdbb51da84 100644 --- a/components/masking_functions/include/masking_functions/query_cache.hpp +++ b/components/masking_functions/include/masking_functions/query_cache.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,7 @@ namespace masking_functions { class query_cache { public: + // passing unique_ptr by value to transfer ownership query_cache(query_builder_ptr query_builder, std::uint64_t flusher_interval_seconds); query_cache(const query_cache &other) = delete; @@ -45,7 +47,9 @@ class query_cache { bool contains(const std::string &dictionary_name, const std::string &term) const; - optional_string get_random(const std::string &dictionary_name) const; + // returns a copy of the string to avoid race conditions + // an empty string is returned if the dictionary does not exist + std::string get_random(const std::string &dictionary_name) const; bool remove(const std::string &dictionary_name); bool remove(const std::string &dictionary_name, const std::string &term); bool insert(const std::string &dictionary_name, const std::string &term); @@ -53,21 +57,20 @@ class query_cache { void reload_cache(); private: - query_builder_ptr m_query_builder; + query_builder_ptr dict_query_builder_; - // TODO: in c++20 change this to std::atomic and - // remove deprecated atomic_load() / atomic_store() - mutable bookshelf_ptr m_dict_cache; + mutable bookshelf_ptr dict_cache_; + mutable std::shared_mutex dict_cache_mutex_; - std::uint64_t m_flusher_interval_seconds; - std::atomic m_is_flusher_stopped; - std::mutex m_flusher_mutex; - std::condition_variable m_flusher_condition_var; + std::uint64_t flusher_interval_seconds_; + std::atomic is_flusher_stopped_; + std::mutex flusher_mutex_; + std::condition_variable flusher_condition_var_; - PSI_thread_key m_psi_flusher_thread_key; - my_thread_handle m_flusher_thread; - my_thread_attr_t m_flusher_thread_attr; - std::unique_ptr m_flusher_thd; + PSI_thread_key psi_flusher_thread_key_; + my_thread_handle flusher_thread_; + my_thread_attr_t flusher_thread_attr_; + std::unique_ptr flusher_thd_; void init_thd() noexcept; void release_thd() noexcept; @@ -76,8 +79,11 @@ class query_cache { static void *run_dict_flusher(void *arg); bookshelf_ptr create_dict_cache_internal() const; - // returning deliberately by value to increase reference counter - bookshelf_ptr get_pinned_dict_cache_internal() const; + using shared_lock_type = std::shared_lock; + using unique_lock_type = std::unique_lock; + const bookshelf &acquire_dict_cache_shared( + shared_lock_type &read_lock, unique_lock_type &write_lock) const; + bookshelf &acquire_dict_cache_unique(unique_lock_type &write_lock) const; }; } // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/bookshelf.cpp b/components/masking_functions/src/masking_functions/bookshelf.cpp index 6685bd34aea0..838d90eae393 100644 --- a/components/masking_functions/src/masking_functions/bookshelf.cpp +++ b/components/masking_functions/src/masking_functions/bookshelf.cpp @@ -15,82 +15,64 @@ #include "masking_functions/bookshelf.hpp" -#include -#include -#include - #include "masking_functions/dictionary.hpp" namespace masking_functions { +bookshelf::bookshelf() = default; +bookshelf::~bookshelf() = default; + bool bookshelf::contains(const std::string &dictionary_name, const std::string &term) const noexcept { - const auto dict{find_dictionary_internal(dictionary_name)}; - if (!dict) { + const auto dictionary_it{dictionaries_.find(dictionary_name)}; + if (dictionary_it == std::cend(dictionaries_)) { return false; } - return dict->contains(term); + return dictionary_it->second->contains(term); } -// returning a copy deliberately for thread safety -optional_string bookshelf::get_random( +std::string_view bookshelf::get_random( const std::string &dictionary_name) const noexcept { - const auto dict{find_dictionary_internal(dictionary_name)}; - if (!dict) { - return std::nullopt; + const auto dictionary_it{dictionaries_.find(dictionary_name)}; + if (dictionary_it == std::cend(dictionaries_)) { + return {}; } - return dict->get_random(); + return dictionary_it->second->get_random(); } bool bookshelf::remove(const std::string &dictionary_name) noexcept { - std::unique_lock dictionaries_write_lock{dictionaries_mutex_}; return dictionaries_.erase(dictionary_name) != 0U; } bool bookshelf::remove(const std::string &dictionary_name, const std::string &term) noexcept { - const auto dict{find_dictionary_internal(dictionary_name)}; - if (!dict) { + const auto dictionary_it{dictionaries_.find(dictionary_name)}; + if (dictionary_it == std::end(dictionaries_)) { return false; } - return dict->remove(term); - // after this operation we may have a dictionary with no terms in it - - // it is fine and much safer than trying to re-acquire a write lock and - // removing the dictionary from the bookshelf when it has 0 terms. + const auto result{dictionary_it->second->remove(term)}; + if (dictionary_it->second->is_empty()) { + dictionaries_.erase(dictionary_it); + } + return result; } bool bookshelf::insert(const std::string &dictionary_name, const std::string &term) { - auto dict{find_dictionary_internal(dictionary_name)}; - if (dict) { - return dict->insert(term); + // here we use try_emplace as an combined version of find and + // insert + const auto [dictionary_it, + inserted]{dictionaries_.try_emplace(dictionary_name)}; + if (!inserted) { + return dictionary_it->second->insert(term); } - - // if no dictionary with such name alteady exist, we need to - // create it under a write lock - { - std::unique_lock dictionaries_write_lock{dictionaries_mutex_}; - // it may happen that between the read and write locks another thread - // already created the dictionary with the same name - checking again - dict = std::make_shared(term); - const auto [dictionary_it, - inserted]{dictionaries_.emplace(dictionary_name, dict)}; - if (inserted) { - return true; - } - dict = dictionary_it->second; - } - return dict->insert(term); -} - -dictionary_ptr bookshelf::find_dictionary_internal( - const std::string &dictionary_name) const noexcept { - std::shared_lock dictionaries_read_lock{dictionaries_mutex_}; - const auto dictionary_it{dictionaries_.find(dictionary_name)}; - if (dictionary_it == std::cend(dictionaries_)) { - return {}; + try { + dictionary_it->second = std::make_unique(term); + } catch (...) { + dictionaries_.erase(dictionary_it); + throw; } - return dictionary_it->second; + return true; } } // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/dictionary.cpp b/components/masking_functions/src/masking_functions/dictionary.cpp index 9d7ddc90bc2c..b6ab1f2d9267 100644 --- a/components/masking_functions/src/masking_functions/dictionary.cpp +++ b/components/masking_functions/src/masking_functions/dictionary.cpp @@ -18,38 +18,33 @@ #include "masking_functions/random_string_generators.hpp" #include -#include -#include namespace masking_functions { -dictionary::dictionary(const std::string &term) : terms_{}, terms_mutex_{} { - terms_.emplace(term); -} +dictionary::dictionary(const std::string &term) + : // here we use std::unordered_set iterator range constructor with + // single 'term' element converted to a fake range + terms_{&term, std::next(&term)} {} bool dictionary::contains(const std::string &term) const noexcept { - std::shared_lock terms_read_lock{terms_mutex_}; + // TODO: in c++20 change to terms_.contains(term) return terms_.count(term) > 0U; } -optional_string dictionary::get_random() const { - std::shared_lock terms_read_lock{terms_mutex_}; - +std::string_view dictionary::get_random() const noexcept { if (terms_.empty()) { - return std::nullopt; + return {}; } const auto random_index{random_number(0, terms_.size() - 1U)}; - return *std::next(terms_.begin(), random_index); + return *std::next(std::begin(terms_), random_index); } bool dictionary::insert(const std::string &term) { - std::unique_lock terms_write_lock{terms_mutex_}; return terms_.emplace(term).second; } bool dictionary::remove(const std::string &term) noexcept { - std::unique_lock terms_write_lock{terms_mutex_}; return terms_.erase(term) > 0U; } diff --git a/components/masking_functions/src/masking_functions/query_cache.cpp b/components/masking_functions/src/masking_functions/query_cache.cpp index f7fd2406eefa..2bfa2311d774 100644 --- a/components/masking_functions/src/masking_functions/query_cache.cpp +++ b/components/masking_functions/src/masking_functions/query_cache.cpp @@ -48,18 +48,19 @@ namespace masking_functions { query_cache::query_cache(query_builder_ptr query_builder, std::uint64_t flusher_interval_seconds) - : m_query_builder{std::move(query_builder)}, - m_dict_cache{}, - m_flusher_interval_seconds{flusher_interval_seconds}, - m_is_flusher_stopped{true} { + : dict_query_builder_{std::move(query_builder)}, + dict_cache_{}, + dict_cache_mutex_{}, + flusher_interval_seconds_{flusher_interval_seconds}, + is_flusher_stopped_{true} { // we do not initialize m_dict_cache with create_dict_cache_internal() here // as this constructor is called from the component initialization method // and any call to mysql_command_query service may mess up with current THD // the cache will be loaded during the first call to one of the dictionary // functions or by the flusher thread - if (m_flusher_interval_seconds > 0) { - PSI_thread_info thread_info{&m_psi_flusher_thread_key, + if (flusher_interval_seconds_ > 0) { + PSI_thread_info thread_info{&psi_flusher_thread_key_, flusher_thd_psi_name, flusher_thd_psi_os_name, PSI_FLAG_SINGLETON, @@ -68,81 +69,102 @@ query_cache::query_cache(query_builder_ptr query_builder, mysql_thread_register(psi_category_name, &thread_info, 1); const auto res = - mysql_thread_create(m_psi_flusher_thread_key, &m_flusher_thread, - &m_flusher_thread_attr, run_dict_flusher, this); + mysql_thread_create(psi_flusher_thread_key_, &flusher_thread_, + &flusher_thread_attr_, run_dict_flusher, this); if (res != 0) { LogComponentErr(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "Cannot initialize dictionary flusher"); } else { - m_is_flusher_stopped = false; + is_flusher_stopped_ = false; } } } query_cache::~query_cache() { - if (!m_is_flusher_stopped) { - m_is_flusher_stopped = true; - m_flusher_condition_var.notify_one(); + if (!is_flusher_stopped_) { + is_flusher_stopped_ = true; + flusher_condition_var_.notify_one(); } } -void query_cache::reload_cache() { - auto local_dict_cache{create_dict_cache_internal()}; - if (!local_dict_cache) { - throw std::runtime_error{"Cannot load dictionary cache"}; - } - - std::atomic_store(&m_dict_cache, local_dict_cache); -} - bool query_cache::contains(const std::string &dictionary_name, const std::string &term) const { - return get_pinned_dict_cache_internal()->contains(dictionary_name, term); + shared_lock_type read_lock{}; + unique_lock_type write_lock{}; + const auto &acquired_dict_cache{ + acquire_dict_cache_shared(read_lock, write_lock)}; + return acquired_dict_cache.contains(dictionary_name, term); } -optional_string query_cache::get_random( - const std::string &dictionary_name) const { - return get_pinned_dict_cache_internal()->get_random(dictionary_name); +std::string query_cache::get_random(const std::string &dictionary_name) const { + shared_lock_type read_lock{}; + unique_lock_type write_lock{}; + const auto &acquired_dict_cache{ + acquire_dict_cache_shared(read_lock, write_lock)}; + return std::string{acquired_dict_cache.get_random(dictionary_name)}; } bool query_cache::remove(const std::string &dictionary_name) { - auto local_dict_cache{get_pinned_dict_cache_internal()}; masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = m_query_builder->delete_for_dictionary(dictionary_name); + auto query{dict_query_builder_->delete_for_dictionary(dictionary_name)}; - if (!sql_ctx.execute_dml(query)) { - return false; - } + unique_lock_type write_lock{}; + auto &acquired_dict_cache{acquire_dict_cache_unique(write_lock)}; - return local_dict_cache->remove(dictionary_name); + // there is a chance that a user can delete the dictionary from the + // dictionary table directly (not via UDF function) and execute_dml() + // will return false here, whereas cache operation will return true - + // this is why we rely only on the result of the cache operation + sql_ctx.execute_dml(query); + return acquired_dict_cache.remove(dictionary_name); } bool query_cache::remove(const std::string &dictionary_name, const std::string &term) { - auto local_dict_cache{get_pinned_dict_cache_internal()}; masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = - m_query_builder->delete_for_dictionary_and_term(dictionary_name, term); + auto query{dict_query_builder_->delete_for_dictionary_and_term( + dictionary_name, term)}; - if (!sql_ctx.execute_dml(query)) { - return false; - } + unique_lock_type write_lock{}; + auto &acquired_dict_cache{acquire_dict_cache_unique(write_lock)}; - return local_dict_cache->remove(dictionary_name, term); + // similarly to another remove() method, we ignore the result of the + // sql operation and rely only on the result of the cache modification + sql_ctx.execute_dml(query); + return acquired_dict_cache.remove(dictionary_name, term); } bool query_cache::insert(const std::string &dictionary_name, const std::string &term) { - auto local_dict_cache{get_pinned_dict_cache_internal()}; masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = m_query_builder->insert_ignore_record(dictionary_name, term); + auto query{dict_query_builder_->insert_ignore_record(dictionary_name, term)}; + + unique_lock_type write_lock{}; + auto &acquired_dict_cache{acquire_dict_cache_unique(write_lock)}; + + // here, as cache insert may throw, we start the 2-phase operation + // with this cache insert because it can be easily reversed without throwing + const auto result{acquired_dict_cache.insert(dictionary_name, term)}; + try { + sql_ctx.execute_dml(query); + } catch (...) { + dict_cache_->remove(dictionary_name, term); + throw; + } + + return result; +} - if (!sql_ctx.execute_dml(query)) { - return false; +void query_cache::reload_cache() { + unique_lock_type dict_cache_write_lock{dict_cache_mutex_}; + + auto local_dict_cache{create_dict_cache_internal()}; + if (!local_dict_cache) { + throw std::runtime_error{"Cannot load dictionary cache"}; } - return local_dict_cache->insert(dictionary_name, term); + dict_cache_ = std::move(local_dict_cache); } void query_cache::init_thd() noexcept { @@ -151,7 +173,7 @@ void query_cache::init_thd() noexcept { thd->set_new_thread_id(); thd->thread_stack = reinterpret_cast(&thd); thd->store_globals(); - m_flusher_thd.reset(thd); + flusher_thd_.reset(thd); } void query_cache::release_thd() noexcept { my_thread_end(); } @@ -159,31 +181,34 @@ void query_cache::release_thd() noexcept { my_thread_end(); } void query_cache::dict_flusher() noexcept { #ifdef HAVE_PSI_THREAD_INTERFACE { - struct PSI_thread *psi = m_flusher_thd->get_psi(); - PSI_THREAD_CALL(set_thread_id)(psi, m_flusher_thd->thread_id()); - PSI_THREAD_CALL(set_thread_THD)(psi, m_flusher_thd.get()); - PSI_THREAD_CALL(set_thread_command)(m_flusher_thd->get_command()); + struct PSI_thread *psi = flusher_thd_->get_psi(); + PSI_THREAD_CALL(set_thread_id)(psi, flusher_thd_->thread_id()); + PSI_THREAD_CALL(set_thread_THD)(psi, flusher_thd_.get()); + PSI_THREAD_CALL(set_thread_command)(flusher_thd_->get_command()); PSI_THREAD_CALL(set_thread_info) (STRING_WITH_LEN("Masking functions component cache flusher")); } #endif - while (!m_is_flusher_stopped) { - std::unique_lock lock{m_flusher_mutex}; + while (!is_flusher_stopped_) { + std::unique_lock lock{flusher_mutex_}; const auto wait_started_at = std::chrono::system_clock::now(); - m_flusher_condition_var.wait_for( - lock, std::chrono::seconds{m_flusher_interval_seconds}, + flusher_condition_var_.wait_for( + lock, std::chrono::seconds{flusher_interval_seconds_}, [this, wait_started_at] { return std::chrono::duration_cast( std::chrono::system_clock::now() - wait_started_at) >= - std::chrono::seconds{m_flusher_interval_seconds} || - m_is_flusher_stopped.load(); + std::chrono::seconds{flusher_interval_seconds_} || + is_flusher_stopped_.load(); }); - if (!m_is_flusher_stopped) { - auto local_dict_cache{create_dict_cache_internal()}; - if (local_dict_cache) { - std::atomic_store(&m_dict_cache, local_dict_cache); + if (!is_flusher_stopped_) { + { + unique_lock_type dict_cache_write_lock{dict_cache_mutex_}; + auto local_dict_cache{create_dict_cache_internal()}; + if (local_dict_cache) { + dict_cache_ = std::move(local_dict_cache); + } } DBUG_EXECUTE_IF("masking_functions_signal_on_cache_reload", { @@ -206,31 +231,42 @@ bookshelf_ptr query_cache::create_dict_cache_internal() const { bookshelf_ptr result; try { masking_functions::sql_context sql_ctx{global_command_services::instance()}; - auto query = m_query_builder->select_all_from_dictionary(); - auto local_dict_cache{std::make_shared()}; + auto query{dict_query_builder_->select_all_from_dictionary()}; + auto local_dict_cache{std::make_unique()}; sql_context::row_callback<2> result_inserter{[&terms = *local_dict_cache]( const auto &field_values) { terms.insert(std::string{field_values[0]}, std::string{field_values[1]}); }}; sql_ctx.execute_select(query, result_inserter); - result = local_dict_cache; + result = std::move(local_dict_cache); } catch (...) { } return result; } -bookshelf_ptr query_cache::get_pinned_dict_cache_internal() const { - auto local_dict_cache{std::atomic_load(&m_dict_cache)}; - if (!local_dict_cache) { - local_dict_cache = create_dict_cache_internal(); +const bookshelf &query_cache::acquire_dict_cache_shared( + shared_lock_type &read_lock, unique_lock_type &write_lock) const { + read_lock = shared_lock_type{dict_cache_mutex_}; + if (!dict_cache_) { + // upgrading to a unique_lock + read_lock.unlock(); + acquire_dict_cache_unique(write_lock); + } + return *dict_cache_; +} + +bookshelf &query_cache::acquire_dict_cache_unique( + unique_lock_type &write_lock) const { + write_lock = unique_lock_type{dict_cache_mutex_}; + if (!dict_cache_) { + auto local_dict_cache{create_dict_cache_internal()}; if (!local_dict_cache) { throw std::runtime_error{"Cannot load dictionary cache"}; } - std::atomic_store(&m_dict_cache, local_dict_cache); + dict_cache_ = std::move(local_dict_cache); } - - return local_dict_cache; + return *dict_cache_; } } // namespace masking_functions diff --git a/components/masking_functions/src/masking_functions/registration_routines.cpp b/components/masking_functions/src/masking_functions/registration_routines.cpp index b8ab6f3dafcc..bb857dd9ff38 100644 --- a/components/masking_functions/src/masking_functions/registration_routines.cpp +++ b/components/masking_functions/src/masking_functions/registration_routines.cpp @@ -976,9 +976,9 @@ class gen_blocklist_impl { auto sresult = global_query_cache::instance()->get_random(cs_dict_b_escaped); - if (sresult && !sresult->empty()) { + if (!sresult.empty()) { masking_functions::charset_string utf8_result{ - global_string_services::instance(), *sresult, + global_string_services::instance(), sresult, masking_functions::charset_string::utf8mb4_collation_name}; masking_functions::charset_string conversion_buffer; const auto &cs_result = masking_functions::smart_convert_to_collation( @@ -1022,8 +1022,8 @@ class gen_dictionary_impl { auto sresult = global_query_cache::instance()->get_random(cs_dictionary_escaped); - if (sresult && !sresult->empty()) { - return *sresult; + if (!sresult.empty()) { + return sresult; } return std::nullopt; diff --git a/components/masking_functions/src/masking_functions/sql_context.cpp b/components/masking_functions/src/masking_functions/sql_context.cpp index 008740fa1cad..cff38e85771a 100644 --- a/components/masking_functions/src/masking_functions/sql_context.cpp +++ b/components/masking_functions/src/masking_functions/sql_context.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include @@ -84,12 +83,12 @@ sql_context::sql_context(const command_service_tuple &services) bool sql_context::execute_dml(std::string_view query) { if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), query.length()) != 0) { - return false; + throw std::runtime_error{"Error while executing SQL DML query"}; } std::uint64_t row_count = 0; if ((*get_services().query->affected_rows)(to_mysql_h(impl_.get()), &row_count) != 0) { - return false; + throw std::runtime_error{"Couldn't get number of affected rows"}; } return row_count > 0; } @@ -99,7 +98,7 @@ void sql_context::execute_select_internal( const row_internal_callback &callback) { if ((*get_services().query->query)(to_mysql_h(impl_.get()), query.data(), query.length()) != 0) { - throw std::runtime_error{"Error while executing SQL query"}; + throw std::runtime_error{"Error while executing SQL select query"}; } unsigned int actual_number_of_fields;