Skip to content

Commit

Permalink
PS-9148: Add caching of dictionary table for component_masking_functions
Browse files Browse the repository at this point in the history
https://perconadev.atlassian.net/browse/PS-9148

- Added caching of mysql.masking_dictionaries table content.
- Implemented masking_dictionaries_flush() UDF which flushes data
  from the masking dictionaries table to the memory cache.
  • Loading branch information
oleksandr-kachan committed Mar 27, 2024
1 parent 6ec7af2 commit 21035cd
Show file tree
Hide file tree
Showing 10 changed files with 388 additions and 152 deletions.
2 changes: 2 additions & 0 deletions components/masking_functions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(DATAMASKING_SOURCES
src/masking_functions/charset_string.cpp
src/masking_functions/charset_string_operations.cpp
src/masking_functions/query_builder.cpp
src/masking_functions/query_cache.cpp
src/masking_functions/random_string_generators.cpp
src/masking_functions/registration_routines.cpp
src/masking_functions/sql_context.cpp
Expand All @@ -41,6 +42,7 @@ set(DATAMASKING_SOURCES
include/masking_functions/command_service_tuple.hpp
include/masking_functions/primitive_singleton.hpp
include/masking_functions/query_builder.hpp
include/masking_functions/query_cache.hpp
include/masking_functions/random_string_generators.hpp
include/masking_functions/registration_routines.hpp
include/masking_functions/sql_context.hpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
#include <string>
#include <string_view>

#include "masking_functions/charset_string_fwd.hpp"

namespace masking_functions {

// A helper class which allows to easily construct SQL-statements necessary
Expand Down Expand Up @@ -56,26 +54,18 @@ class query_builder {
return term_field_name_;
}

std::string select_random_term_for_dictionary(
const charset_string &dictionary_name) const {
return select_term_for_dictionary_internal(dictionary_name, nullptr);
}
std::string check_term_presence_in_dictionary(
const charset_string &dictionary_name, const charset_string &term) const {
return select_term_for_dictionary_internal(dictionary_name, &term);
}
std::string select_all_from_dictionary() const;

std::string insert_ignore_record(const charset_string &dictionary_name,
const charset_string &term) const;
std::string insert_ignore_record(const std::string &dictionary_name,
const std::string &term) const;

std::string delete_for_dictionary(
const charset_string &dictionary_name) const {
std::string delete_for_dictionary(const std::string &dictionary_name) const {
return delete_for_dictionary_and_opt_term_internal(dictionary_name,
nullptr);
}

std::string delete_for_dictionary_and_term(
const charset_string &dictionary_name, const charset_string &term) const {
std::string delete_for_dictionary_and_term(const std::string &dictionary_name,
const std::string &term) const {
return delete_for_dictionary_and_opt_term_internal(dictionary_name, &term);
}

Expand All @@ -85,13 +75,8 @@ class query_builder {
std::string dictionary_field_name_;
std::string term_field_name_;

std::string select_term_for_dictionary_internal(
const charset_string &dictionary_name,
const charset_string *opt_term) const;

std::string delete_for_dictionary_and_opt_term_internal(
const charset_string &dictionary_name,
const charset_string *opt_term) const;
const std::string &dictionary_name, const std::string *opt_term) const;
};

} // namespace masking_functions
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */

#ifndef MASKING_FUNCTIONS_QUERY_CACHE_HPP
#define MASKING_FUNCTIONS_QUERY_CACHE_HPP

#include <map>
#include <string>

#include "masking_functions/sql_context.hpp"

namespace masking_functions {

class query_cache {
public:
query_cache();

bool check_term_presence_in_dictionary(const std::string &dictionary_name,
const std::string &term) const;

sql_context::optional_string select_random_term_for_dictionary(
const std::string &dictionary_name) const;

bool delete_for_dictionary(const std::string &dictionary_name);

bool delete_for_dictionary_and_term(const std::string &dictionary_name,
const std::string &term);

bool insert_ignore_record(const std::string &dictionary_name,
const std::string &term);
bool load_cache();

private:
sql_context::dict_container_type m_dict_cache;
};

} // namespace masking_functions

#endif // MASKING_FUNCTIONS_QUERY_CACHE_HPP
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef MASKING_FUNCTIONS_SQL_CONTEXT_HPP
#define MASKING_FUNCTIONS_SQL_CONTEXT_HPP

#include <map>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
Expand All @@ -30,6 +32,8 @@ namespace masking_functions {
// construction.
class sql_context {
public:
using dict_container_type = std::multimap<std::string, std::string>;
using optional_dict_container = std::optional<dict_container_type>;
using optional_string = std::optional<std::string>;

explicit sql_context(const command_service_tuple &services);
Expand All @@ -46,9 +50,7 @@ class sql_context {
return *impl_.get_deleter().services;
}

// Executes a query where we either expect a single result (one row one
// column), or nothing
optional_string query_single_value(std::string_view query);
optional_dict_container query_list(std::string_view query);

bool execute(std::string_view query);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,52 +17,40 @@

#include "masking_functions/query_builder.hpp"

#include "masking_functions/sql_escape_functions.hpp"

namespace masking_functions {

std::string query_builder::insert_ignore_record(
const charset_string &dictionary_name, const charset_string &term) const {
const std::string &dictionary_name, const std::string &term) const {
std::ostringstream oss;
oss << "INSERT IGNORE INTO " << get_database_name() << '.' << get_table_name()
<< " (" << get_dictionary_field_name() << ", " << get_term_field_name()
<< ')' << " VALUES('" << escape_string(dictionary_name) << "', '"
<< escape_string(term) << "')";
<< ')' << " VALUES('" << dictionary_name << "', '" << term << "')";
return oss.str();
}

std::string query_builder::select_term_for_dictionary_internal(
const charset_string &dictionary_name,
const charset_string *opt_term) const {
std::string query_builder::select_all_from_dictionary() const {
std::ostringstream oss;
// In our implementation there is no requirement that the `Term` field in
// the `mysql.masking_dictionaries` table must be in `utf8mb4`. So, by
// adding CONVERT(Term USING utf8mb4) we support other character sets in
// the underlying table as well.
oss << "SELECT "
<< "CONVERT(" << get_dictionary_field_name() << " USING "
<< default_result_character_set << "), "
<< "CONVERT(" << get_term_field_name() << " USING "
<< default_result_character_set << ") FROM " << get_database_name() << '.'
<< get_table_name() << " WHERE " << get_dictionary_field_name() << " = '"
<< escape_string(dictionary_name) << '\'';
if (opt_term != nullptr) {
oss << " AND " << get_term_field_name() << " = '"
<< escape_string(*opt_term) << '\'';
} else {
oss << " ORDER BY RAND() LIMIT 1";
}
<< get_table_name();
return oss.str();
}

std::string query_builder::delete_for_dictionary_and_opt_term_internal(
const charset_string &dictionary_name,
const charset_string *opt_term) const {
const std::string &dictionary_name, const std::string *opt_term) const {
std::ostringstream oss;
oss << "DELETE FROM " << get_database_name() << '.' << get_table_name()
<< " WHERE " << get_dictionary_field_name() << " = '"
<< escape_string(dictionary_name) << '\'';
<< " WHERE " << get_dictionary_field_name() << " = '" << dictionary_name
<< '\'';
if (opt_term != nullptr) {
oss << " AND " << get_term_field_name() << " = '"
<< escape_string(*opt_term) << '\'';
oss << " AND " << get_term_field_name() << " = '" << *opt_term << '\'';
}
return oss.str();
}
Expand Down
133 changes: 133 additions & 0 deletions components/masking_functions/src/masking_functions/query_cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/* Copyright (c) 2024 Percona LLC and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */

#include "masking_functions/query_cache.hpp"

#include "masking_functions/command_service_tuple.hpp"
#include "masking_functions/primitive_singleton.hpp"
#include "masking_functions/query_builder.hpp"

#include <algorithm>

namespace masking_functions {
namespace {

using global_command_services = masking_functions::primitive_singleton<
masking_functions::command_service_tuple>;
using global_query_builder =
masking_functions::primitive_singleton<masking_functions::query_builder>;

} // namespace

query_cache::query_cache() { load_cache(); }

bool query_cache::load_cache() {
auto query = global_query_builder::instance().select_all_from_dictionary();
auto result =
masking_functions::sql_context{global_command_services::instance()}
.query_list(query);

if (result.has_value()) {
m_dict_cache.swap(result.value());
}

return result.has_value();
}

bool query_cache::check_term_presence_in_dictionary(
const std::string &dictionary_name, const std::string &term) const {
masking_functions::sql_context sql_ctx{global_command_services::instance()};

auto range = m_dict_cache.equal_range(dictionary_name);

if (range.first == range.second) {
return false;
}

if (term.length() == 0) {
return true;
}

return std::find_if(range.first, range.second, [term](const auto &el) {
return el.second == term;
}) != range.second;
}

sql_context::optional_string query_cache::select_random_term_for_dictionary(
const std::string &dictionary_name) const {
auto range = m_dict_cache.equal_range(dictionary_name);

if (range.first == range.second) {
return std::nullopt;
}

int random_step = rand() % std::distance(range.first, range.second);
std::advance(range.first, random_step);

return sql_context::optional_string{std::in_place, range.first->second};
}

bool query_cache::delete_for_dictionary(const std::string &dictionary_name) {
masking_functions::sql_context sql_ctx{global_command_services::instance()};
auto query =
global_query_builder::instance().delete_for_dictionary(dictionary_name);

if (!sql_ctx.execute(query)) {
return false;
}

m_dict_cache.erase(dictionary_name);
return true;
}

bool query_cache::delete_for_dictionary_and_term(
const std::string &dictionary_name, const std::string &term) {
masking_functions::sql_context sql_ctx{global_command_services::instance()};
auto query = global_query_builder::instance().delete_for_dictionary_and_term(
dictionary_name, term);

if (!sql_ctx.execute(query)) {
return false;
}

auto range = m_dict_cache.equal_range(dictionary_name);

if (range.first != range.second) {
auto it = std::find_if(range.first, range.second, [term](const auto &el) {
return el.second == term;
});
if (it != range.second) {
m_dict_cache.erase(it);
}
}

return true;
}

bool query_cache::insert_ignore_record(const std::string &dictionary_name,
const std::string &term) {
masking_functions::sql_context sql_ctx{global_command_services::instance()};
auto query = global_query_builder::instance().insert_ignore_record(
dictionary_name, term);

if (!sql_ctx.execute(query)) {
return false;
}

m_dict_cache.insert({dictionary_name, term});
return true;
}

} // namespace masking_functions
Loading

0 comments on commit 21035cd

Please sign in to comment.