File tree Expand file tree Collapse file tree 2 files changed +27
-15
lines changed Expand file tree Collapse file tree 2 files changed +27
-15
lines changed Original file line number Diff line number Diff line change 22#include " utils.h"
33
44R_xlen_t find_first (SEXP haystack, SEXP needle) {
5- SEXP needle_utf8 = PROTECT (str_as_utf8 (needle));
6- R_xlen_t n = XLENGTH (haystack);
5+ if (!string_is_ascii_or_utf8 (needle)) {
6+ needle = string_as_utf8 (needle);
7+ }
8+ PROTECT (needle);
9+
10+ const R_xlen_t n = XLENGTH (haystack);
711 R_xlen_t i_name = 0 ;
8- for (; i_name < n; i_name++) {
9- if (needle_utf8 == str_as_utf8 (STRING_ELT (haystack, i_name))) break ;
12+
13+ for (; i_name < n; ++i_name) {
14+ SEXP haystack_elt = STRING_ELT (haystack, i_name);
15+
16+ if (!string_is_ascii_or_utf8 (haystack_elt)) {
17+ // No need to `PROTECT()`, we do a pointer comparison
18+ // and then throw it away
19+ haystack_elt = string_as_utf8 (haystack_elt);
20+ }
21+
22+ if (needle == haystack_elt) {
23+ break ;
24+ }
1025 }
26+
1127 UNPROTECT (1 );
1228 return i_name;
1329}
Original file line number Diff line number Diff line change 66#include <Rinternals.h>
77#include <Rversion.h>
88
9- static inline
10- bool str_is_utf8 (SEXP x ) {
9+ // String encoding normalization
10+ // From https://github.com/r-lib/vctrs/pull/2085
11+ static inline bool string_is_ascii_or_utf8 (SEXP x ) {
1112#if (R_VERSION >= R_Version (4 , 5 , 0 ))
12- return Rf_charIsUTF8 ( x );
13+ return Rf_charIsASCII ( x ) || ( Rf_getCharCE ( x ) == CE_UTF8 ) || ( x == NA_STRING );
1314#else
1415 const int mask_ascii = 8 ;
1516 const int mask_utf8 = 64 ;
1617 const int levels = LEVELS (x );
17- return (levels & mask_ascii ) || (levels & mask_utf8 );
18+ return (levels & mask_ascii ) || (levels & mask_utf8 ) || ( x == NA_STRING ) ;
1819#endif
1920}
2021
21- static inline
22- SEXP str_as_utf8 (SEXP x ) {
23- if (str_is_utf8 (x )) {
24- return x ;
25- } else {
26- return Rf_mkCharCE (Rf_translateCharUTF8 (x ), CE_UTF8 );
27- }
22+ static inline SEXP string_as_utf8 (SEXP x ) {
23+ return Rf_mkCharCE (Rf_translateCharUTF8 (x ), CE_UTF8 );
2824}
2925
3026#endif
You can’t perform that action at this time.
0 commit comments