Skip to content

Commit 11e633b

Browse files
authored
Use finalized version of string translation helpers (#7744)
* Use finalized version of string translation helpers * Use exact version of vctrs utilities
1 parent 36539cc commit 11e633b

File tree

2 files changed

+27
-15
lines changed

2 files changed

+27
-15
lines changed

src/mask.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,28 @@
22
#include "utils.h"
33

44
R_xlen_t find_first(SEXP haystack, SEXP needle) {
5-
SEXP needle_utf8 = PROTECT(str_as_utf8(needle));
6-
R_xlen_t n = XLENGTH(haystack);
5+
if (!string_is_ascii_or_utf8(needle)) {
6+
needle = string_as_utf8(needle);
7+
}
8+
PROTECT(needle);
9+
10+
const R_xlen_t n = XLENGTH(haystack);
711
R_xlen_t i_name = 0;
8-
for (; i_name < n; i_name++) {
9-
if (needle_utf8 == str_as_utf8(STRING_ELT(haystack, i_name))) break;
12+
13+
for (; i_name < n; ++i_name) {
14+
SEXP haystack_elt = STRING_ELT(haystack, i_name);
15+
16+
if (!string_is_ascii_or_utf8(haystack_elt)) {
17+
// No need to `PROTECT()`, we do a pointer comparison
18+
// and then throw it away
19+
haystack_elt = string_as_utf8(haystack_elt);
20+
}
21+
22+
if (needle == haystack_elt) {
23+
break;
24+
}
1025
}
26+
1127
UNPROTECT(1);
1228
return i_name;
1329
}

src/utils.h

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,21 @@
66
#include <Rinternals.h>
77
#include <Rversion.h>
88

9-
static inline
10-
bool str_is_utf8(SEXP x) {
9+
// String encoding normalization
10+
// From https://github.com/r-lib/vctrs/pull/2085
11+
static inline bool string_is_ascii_or_utf8(SEXP x) {
1112
#if (R_VERSION >= R_Version(4, 5, 0))
12-
return Rf_charIsUTF8(x);
13+
return Rf_charIsASCII(x) || (Rf_getCharCE(x) == CE_UTF8) || (x == NA_STRING);
1314
#else
1415
const int mask_ascii = 8;
1516
const int mask_utf8 = 64;
1617
const int levels = LEVELS(x);
17-
return (levels & mask_ascii) || (levels & mask_utf8);
18+
return (levels & mask_ascii) || (levels & mask_utf8) || (x == NA_STRING);
1819
#endif
1920
}
2021

21-
static inline
22-
SEXP str_as_utf8(SEXP x) {
23-
if (str_is_utf8(x)) {
24-
return x;
25-
} else {
26-
return Rf_mkCharCE(Rf_translateCharUTF8(x), CE_UTF8);
27-
}
22+
static inline SEXP string_as_utf8(SEXP x) {
23+
return Rf_mkCharCE(Rf_translateCharUTF8(x), CE_UTF8);
2824
}
2925

3026
#endif

0 commit comments

Comments
 (0)