From 72b232acc67017a9578c93d46f36d91b83a1a053 Mon Sep 17 00:00:00 2001 From: Iain Lane Date: Fri, 4 Jun 2021 13:30:27 +0100 Subject: [PATCH 1/2] utils: Don't strip modifiers when stripping encoding We have some locales which are dupes if we do this, for example ca_ES.UTF-8 and ca_ES.UTF-8@valencia. The latter of these should become ca_ES@valencia in the output. That is what `locale -a` shows. Previously `as_locale_strip_encoding ()` modified the passed-in string in place. However, in the one place where don't `g_strdup ()` the string before passing to this function, it is the key in a `GHashTable`. We can't do this, and only get away with it because the hash table isn't touched after this call. Fix the function to instead return a newly allocated string, and drop the `g_strdup` calls from the other call sites. Add a small test for this function too. https://github.com/ximion/appstream-generator/issues/92 --- src/as-context.c | 2 +- src/as-utils-private.h | 5 ++++- src/as-utils.c | 10 +++------- src/as-yaml.c | 6 ++++-- tests/test-misc.c | 18 ++++++++++++++++++ 5 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/as-context.c b/src/as-context.c index cade065d3..9ce9b83f4 100644 --- a/src/as-context.c +++ b/src/as-context.c @@ -513,7 +513,7 @@ as_context_localized_ht_set (AsContext *ctx, GHashTable *lht, const gchar *value if (selected_locale == NULL) selected_locale = "C"; - locale_noenc = as_locale_strip_encoding (g_strdup (selected_locale)); + locale_noenc = as_locale_strip_encoding (selected_locale); g_hash_table_insert (lht, g_ref_string_new_intern (locale_noenc), g_strdup (value)); diff --git a/src/as-utils-private.h b/src/as-utils-private.h index 110e006d7..5dbeb2d35 100644 --- a/src/as-utils-private.h +++ b/src/as-utils-private.h @@ -97,7 +97,10 @@ AS_INTERNAL_VISIBLE gboolean as_copy_file (const gchar *source, const gchar *destination, GError **error); gboolean as_is_cruft_locale (const gchar *locale); -gchar *as_locale_strip_encoding (gchar *locale); + +AS_INTERNAL_VISIBLE +gchar *as_locale_strip_encoding (const gchar *locale); + gchar *as_utils_locale_to_language (const gchar *locale); gchar *as_get_current_arch (void); diff --git a/src/as-utils.c b/src/as-utils.c index 66f8a8222..4ac6050a5 100644 --- a/src/as-utils.c +++ b/src/as-utils.c @@ -776,16 +776,12 @@ as_is_cruft_locale (const gchar *locale) * as_locale_strip_encoding: * * Remove the encoding from a locale string. - * The function modifies the string directly. + * The function returns a newly allocated string. */ gchar* -as_locale_strip_encoding (gchar *locale) +as_locale_strip_encoding (const gchar *locale) { - gchar *tmp; - tmp = g_strstr_len (locale, -1, ".UTF-8"); - if (tmp != NULL) - *tmp = '\0'; - return locale; + return as_str_replace (locale, ".UTF-8", ""); } /** diff --git a/src/as-yaml.c b/src/as-yaml.c index 701429975..170d80151 100644 --- a/src/as-yaml.c +++ b/src/as-yaml.c @@ -509,7 +509,7 @@ as_yaml_set_localized_table (AsContext *ctx, GNode *node, GHashTable *l10n_table for (GNode *n = node->children; n != NULL; n = n->next) { const gchar *locale = as_yaml_get_node_locale (ctx, n); if (locale != NULL) { - g_autofree gchar *locale_noenc = as_locale_strip_encoding (g_strdup (locale)); + g_autofree gchar *locale_noenc = as_locale_strip_encoding (locale); g_hash_table_insert (l10n_table, g_ref_string_new_intern (locale_noenc), g_strdup (as_yaml_node_get_value (n))); @@ -641,6 +641,7 @@ as_yaml_emit_sequence_from_str_array (yaml_emitter_t *emitter, const gchar *key, static void as_yaml_localized_list_helper (gchar *key, gchar **strv, yaml_emitter_t *emitter) { + g_autofree gchar *locale_noenc = NULL; guint i; if (strv == NULL) return; @@ -649,7 +650,8 @@ as_yaml_localized_list_helper (gchar *key, gchar **strv, yaml_emitter_t *emitter if (as_is_cruft_locale (key)) return; - as_yaml_emit_scalar (emitter, as_locale_strip_encoding (key)); + locale_noenc = as_locale_strip_encoding (key); + as_yaml_emit_scalar (emitter, locale_noenc); as_yaml_sequence_start (emitter); for (i = 0; strv[i] != NULL; i++) { as_yaml_emit_scalar (emitter, strv[i]); diff --git a/tests/test-misc.c b/tests/test-misc.c index 4e9bca8de..0d7bdce50 100644 --- a/tests/test-misc.c +++ b/tests/test-misc.c @@ -21,6 +21,7 @@ #include #include "appstream.h" #include "as-news-convert.h" +#include "as-utils-private.h" #include "as-test-utils.h" @@ -192,6 +193,22 @@ test_readwrite_text_news () g_free (tmp); } +static void +test_locale_strip_encoding () +{ + g_autofree gchar *c = NULL; + g_autofree gchar *cutf8 = NULL; + g_autofree gchar *cutf8valencia = NULL; + + c = as_locale_strip_encoding ("C"); + cutf8 = as_locale_strip_encoding ("C.UTF-8"); + cutf8valencia = as_locale_strip_encoding ("C.UTF-8@valencia"); + + g_assert_cmpstr (c, ==, "C"); + g_assert_cmpstr (cutf8, ==, "C"); + g_assert_cmpstr (cutf8valencia, ==, "C@valencia"); +} + int main (int argc, char **argv) { @@ -214,6 +231,7 @@ main (int argc, char **argv) g_test_add_func ("/AppStream/Misc/YAMLNews", test_readwrite_yaml_news); g_test_add_func ("/AppStream/Misc/TextNews", test_readwrite_text_news); + g_test_add_func ("/AppStream/Misc/StripLocaleEncoding", test_locale_strip_encoding); ret = g_test_run (); g_free (datadir); From 8f8327bed13830d1ba62df63fec77fc0e3b5c17c Mon Sep 17 00:00:00 2001 From: Iain Lane Date: Mon, 7 Jun 2021 11:06:06 +0100 Subject: [PATCH 2/2] utils: Replace as_gstring_replace definition with upstream's Until we can depend on 2.68, it's probably best to standardise on one implementation - also, this one is much simpler. --- compose/asc-font.c | 4 +- src/as-news-convert.c | 2 +- src/as-spdx.c | 8 ++-- src/as-utils-private.h | 3 +- src/as-utils.c | 99 +++++++++++++++++++----------------------- src/as-utils.h | 3 +- tools/ascli-utils.c | 2 +- 7 files changed, 57 insertions(+), 64 deletions(-) diff --git a/compose/asc-font.c b/compose/asc-font.c index 257a9aa37..82b6a8a9b 100644 --- a/compose/asc-font.c +++ b/compose/asc-font.c @@ -423,12 +423,12 @@ asc_font_get_id (AscFont *font) return priv->id; tmp = g_utf8_strdown (asc_font_get_family (font), -1); - tmp_family = as_str_replace (tmp, " ", ""); + tmp_family = as_str_replace (tmp, " ", "", 0); as_strstripnl (tmp_family); g_free (tmp); tmp = g_utf8_strdown (asc_font_get_style (font), -1); - tmp_style = as_str_replace (tmp, " ", ""); + tmp_style = as_str_replace (tmp, " ", "", 0); as_strstripnl (tmp_style); g_free (tmp); diff --git a/src/as-news-convert.c b/src/as-news-convert.c index 444167b15..71d5a4789 100644 --- a/src/as-news-convert.c +++ b/src/as-news-convert.c @@ -650,7 +650,7 @@ as_news_text_to_releases (const gchar *data, GError **error) /* try to unsplit lines */ data_str = g_string_new (data); - as_gstring_replace (data_str, "\n ", " "); + as_gstring_replace (data_str, "\n ", " ", 0); /* break up into sections */ desc = g_string_new (""); diff --git a/src/as-spdx.c b/src/as-spdx.c index 12704d434..eb6b9619f 100644 --- a/src/as-spdx.c +++ b/src/as-spdx.c @@ -286,8 +286,8 @@ static GString* as_utils_spdx_license_3to2 (const gchar *license3) { GString *license2 = g_string_new (license3); - as_gstring_replace (license2, "-only", ""); - as_gstring_replace (license2, "-or-later", "+"); + as_gstring_replace (license2, "-only", "", 1); + as_gstring_replace (license2, "-or-later", "+", 1); return license2; } @@ -302,8 +302,8 @@ static GString* as_utils_spdx_license_2to3 (const gchar *license2) { GString *license3 = g_string_new (license2); - as_gstring_replace (license3, ".0+", ".0-or-later"); - as_gstring_replace (license3, ".1+", ".1-or-later"); + as_gstring_replace (license3, ".0+", ".0-or-later", 1); + as_gstring_replace (license3, ".1+", ".1-or-later", 1); return license3; } diff --git a/src/as-utils-private.h b/src/as-utils-private.h index 5dbeb2d35..4d0d5ae9a 100644 --- a/src/as-utils-private.h +++ b/src/as-utils-private.h @@ -82,7 +82,8 @@ gboolean as_utils_is_writable (const gchar *path); AS_INTERNAL_VISIBLE gchar *as_str_replace (const gchar *str, const gchar *old_str, - const gchar *new_str); + const gchar *new_str, + guint limit); gchar **as_ptr_array_to_strv (GPtrArray *array); const gchar *as_ptr_array_find_string (GPtrArray *array, diff --git a/src/as-utils.c b/src/as-utils.c index 4ac6050a5..28d955a80 100644 --- a/src/as-utils.c +++ b/src/as-utils.c @@ -593,66 +593,55 @@ as_ptr_array_to_strv (GPtrArray *array) } /** - * as_gstring_replace: - * @string: The #GString to operate on - * @search: The text to search for - * @replace: The text to use for substitutions - * - * Performs multiple search and replace operations on the given string. - * - * Returns: the number of replacements done, or 0 if @search is not found. + * g_string_replace: + * @string: a #GString + * @find: the string to find in @string + * @replace: the string to insert in place of @find + * @limit: the maximum instances of @find to replace with @replace, or `0` for + * no limit + * + * Replaces the string @find with the string @replace in a #GString up to + * @limit times. If the number of instances of @find in the #GString is + * less than @limit, all instances are replaced. If @limit is `0`, + * all instances of @find are replaced. + * + * Returns: the number of find and replace operations performed. **/ guint -as_gstring_replace (GString *string, const gchar *search, const gchar *replace) +as_gstring_replace (GString *string, const gchar *find, const gchar *replace, guint limit) { - gchar *tmp; - guint count = 0; - gsize search_idx = 0; - gsize replace_len; - gsize search_len; +#if GLIB_CHECK_VERSION(2,68,0) + return g_string_replace (string, find, replace, limit); +#else + /* note: This is a direct copy from GLib upstream (with whitespace + * fixed spaces to tabs and with style fixed). Once we can depend on + * GLib 2.68, this copy should be dropped and g_string_replace() used + * instead. + * + * GLib is licensed under the LGPL-2.1+. + */ + gsize f_len, r_len, pos; + gchar *cur, *next; + guint n = 0; g_return_val_if_fail (string != NULL, 0); - g_return_val_if_fail (search != NULL, 0); + g_return_val_if_fail (find != NULL, 0); g_return_val_if_fail (replace != NULL, 0); - /* nothing to do */ - if (string->len == 0) - return 0; - - search_len = strlen (search); - replace_len = strlen (replace); + f_len = strlen (find); + r_len = strlen (replace); + cur = string->str; - do { - tmp = g_strstr_len (string->str + search_idx, -1, search); - if (tmp == NULL) - break; - - /* advance the counter in case @replace contains @search */ - search_idx = (gsize) (tmp - string->str); - - /* reallocate the string if required */ - if (search_len > replace_len) { - g_string_erase (string, - (gssize) search_idx, - (gssize) (search_len - replace_len)); - memcpy (tmp, replace, replace_len); - } else if (search_len < replace_len) { - g_string_insert_len (string, - (gssize) search_idx, - replace, - (gssize) (replace_len - search_len)); - /* we have to treat this specially as it could have - * been reallocated when the insertion happened */ - memcpy (string->str + search_idx, replace, replace_len); - } else { - /* just memcmp in the new string */ - memcpy (tmp, replace, replace_len); - } - search_idx += replace_len; - count++; - } while (TRUE); + while ((next = strstr (cur, find)) != NULL) { + pos = next - string->str; + g_string_erase (string, pos, f_len); + g_string_insert (string, pos, replace); + cur = string->str + pos + r_len; + n++; + } - return count; + return n; +#endif /* !GLIB_CHECK_VERSION(2,68.0) */ } /** @@ -660,18 +649,20 @@ as_gstring_replace (GString *string, const gchar *search, const gchar *replace) * @str: The string to operate on * @old_str: The old value to replace. * @new_str: The new value to replace @old_str with. + * @limit: the maximum instances of @find to replace with @new_str, or `0` for + * no limit * * Performs search & replace on the given string. * * Returns: A new string with the characters replaced. */ gchar* -as_str_replace (const gchar *str, const gchar *old_str, const gchar *new_str) +as_str_replace (const gchar *str, const gchar *old_str, const gchar *new_str, guint limit) { GString *gstr; gstr = g_string_new (str); - as_gstring_replace (gstr, old_str, new_str); + as_gstring_replace (gstr, old_str, new_str, limit); return g_string_free (gstr, FALSE); } @@ -781,7 +772,7 @@ as_is_cruft_locale (const gchar *locale) gchar* as_locale_strip_encoding (const gchar *locale) { - return as_str_replace (locale, ".UTF-8", ""); + return as_str_replace (locale, ".UTF-8", "", 1); } /** diff --git a/src/as-utils.h b/src/as-utils.h index add0b5cb6..c21fc0ed2 100644 --- a/src/as-utils.h +++ b/src/as-utils.h @@ -118,7 +118,8 @@ guint as_utils_data_id_hash (const gchar *data_id); guint as_gstring_replace (GString *string, const gchar *search, - const gchar *replace); + const gchar *replace, + guint limit); gboolean as_utils_is_platform_triplet (const gchar *triplet); diff --git a/tools/ascli-utils.c b/tools/ascli-utils.c index aa7e3af00..c94d96daa 100644 --- a/tools/ascli-utils.c +++ b/tools/ascli-utils.c @@ -56,7 +56,7 @@ ascli_format_long_output (const gchar *str, guint line_length, guint indent_leve if (indent_level > 0) { g_autofree gchar *spacing = g_strnfill (indent_level, ' '); g_autofree gchar *spacing_nl = g_strconcat ("\n", spacing, NULL); - as_gstring_replace (res, "\n", spacing_nl); + as_gstring_replace (res, "\n", spacing_nl, 0); g_string_prepend (res, spacing); }