diff --git a/NEWS b/NEWS index c3c814479..aacaaf160 100644 --- a/NEWS +++ b/NEWS @@ -3,7 +3,7 @@ ## 1.6.2.9xxx (to-be >=1.6.3) (2021-xx-yy) -* TODO ... [NEW FEATURE] #420: `stri_sprintf` (alias: `stri_string_format`) +* [NEW FEATURE] #420: `stri_sprintf` (alias: `stri_string_format`) is a Unicode-aware replacement for and enhancement of the base `sprintf`: it adds a customised handling of `NA`s (on demand), computing field size based on code point width, @@ -12,12 +12,12 @@ Moreover, `stri_printf` can be used to display formatted strings conveniently. -* [BACKWARD INCOMPATIBILITY] `%s$%` and `%stri$%` now use `stri_sprintf` - instead of `base::sprintf`. - * TODO ... [NEW FEATURE] #434: `stri_datetime_format` and `stri_datetime_parse` is now also vectorised with respect to the `format` argument. +* [BACKWARD INCOMPATIBILITY] `%s$%` and `%stri$%` now use `stri_sprintf` + instead of `base::sprintf`. + * [INTERNAL] `stri_prepare_arg*`s have been refactored, buffer overruns in the exception handling subsystem are now avoided. diff --git a/R/pad.R b/R/pad.R index 1dbdfcc15..34bfcb988 100644 --- a/R/pad.R +++ b/R/pad.R @@ -61,7 +61,7 @@ #' @param width integer vector giving minimal output string lengths #' @param side [\code{stri_pad} only] single character string; #' sides on which padding character is added -#' (\code{left}, \code{right}, or \code{both}) +#' (\code{left} (default), \code{right}, or \code{both}) #' @param pad character vector giving padding code points #' @param use_length single logical value; should the number of code #' points be used instead of the total code point width @@ -69,7 +69,6 @@ #' #' @return These functions return a character vector. #' -#' @rdname stri_pad #' @examples #' stri_pad_left('stringi', 10, pad='#') #' stri_pad_both('stringi', 8:12, pad='*') @@ -79,6 +78,9 @@ #' cat(stri_pad_both(c('\ud6c8\ubbfc\uc815\uc74c', # takes width into account #' stri_trans_nfkd('\ud6c8\ubbfc\uc815\uc74c'), 'abcd'), #' width=10), sep='\n') +#' +#' @family length +#' @rdname stri_pad #' @export stri_pad_both <- function(str, width = floor(0.9 * getOption("width")), pad = " ", use_length = FALSE) diff --git a/R/sprintf.R b/R/sprintf.R index da7d94f62..6c7a15c74 100644 --- a/R/sprintf.R +++ b/R/sprintf.R @@ -35,45 +35,56 @@ #' Format Strings #' #' @description -#' A Unicode-aware replacement for and enhancement of +#' \code{stri_sprintf} (synonym: \code{stri_string_format}) +#' is a Unicode-aware replacement for and enhancement of #' the built-in \code{\link[base]{sprintf}} function. #' Moreover, \code{stri_printf} prints formatted strings. #' #' @details #' Vectorized over \code{format} and all vectors passed via \code{...}. #' -#' \code{stri_string_format} is a synonym for \code{stri_sprintf}. -#' -#' Note that \code{stri_printf} treats missing values in \code{...} -#' as \code{"NA"} strings by default. -#' -#' Note that Unicode code points may have various widths when -#' printed on the console and that, by default, the function takes that -#' into account. By changing the state of the \code{use_length} -#' argument, this function act as if each code point was of width 1. -#' -#' For \code{\%d} and \code{\%f} formats, factors are treated as integer -#' vectors (underlying codes) and so are date and time objects, etc. +#' Unicode code points may have various widths when +#' printed on the console (compare \code{\link{stri_width}}). +#' These functions, by default (see the \code{use_length} argument), take this +#' into account. #' #' This function is not locale sensitive. For instance, numbers are -#' always formatted in the "POSIX" style, e.g., \code{-123456.789}. +#' always formatted in the "POSIX" style, e.g., \code{-123456.789} +#' (no thousands separator, dot as a fractional separator). #' Such a feature might be added at a later date, though. #' #' All arguments passed via \code{...} are evaluated. If some of them #' are unused, a warning is generated. Too few arguments result in an error. #' +#' Note that \code{stri_printf} treats missing values in \code{...} +#' as strings \code{"NA"} by default. +#' +#' All format specifiers supported \code{\link[base]{sprintf}} are +#' also available here. For the formatting of integers and floating-point +#' values, currently the system \code{std::snprintf()} is called, but +#' this may change in the future. Format specifiers are normalized +#' and necessary sanity checks are performed. +#' +#' Supported conversion specifiers: \code{dioxX} (integers) +#' \code{feEgGaA} (floats) and \code{s} (character strings). +#' Supported flags: \code{-} (left-align), +#' \code{+} (force output sign or blank when \code{NaN} or \code{NA}; numeric only), +#' \code{} (output minus or space for a sign; numeric only) +#' \code{0} (pad with 0s; numeric only), +#' \code{#} (alternative output of some numerics). +#' #' -#' @param format character vector of format strings +#' @param format character vector of format strings \code{\link[base]{sprintf}} #' @param ... vectors (coercible to integer, real, or character) #' @param na_string single string to represent missing values; #' if \code{NA}, missing values in \code{...} #' result in the corresponding outputs be missing too; #' use \code{"NA"} for compatibility with base R -#' @param inf_string single string to represent the (unsigned) infinity -#' @param na_string single string to represent the not-a-number +#' @param inf_string single string to represent the (unsigned) infinity (\code{NA} allowed) +#' @param nan_string single string to represent the not-a-number (\code{NA} allowed) #' @param use_length single logical value; should the number of code #' points be used when applying modifiers such as \code{\%20s} -#' instead of the total code point width (see \code{\link{stri_width}})? +#' instead of the total code point width? #' @param file see \code{\link[base]{cat}} #' @param sep see \code{\link[base]{cat}} #' @param append see \code{\link[base]{cat}} @@ -86,11 +97,23 @@ #' The other functions return a character vector. #' #' +#' @references +#' \code{printf} in \code{glibc}, +#' \url{https://man.archlinux.org/man/printf.3} +#' +#' \code{printf} format strings -- Wikipedia, +#' \url{https://en.wikipedia.org/wiki/Printf_format_string} +#' #' @examples -#' #... #' stri_printf("%4s=%.3f", c("e", "e\u00b2", "\u03c0", "\u03c0\u00b2"), #' c(exp(1), exp(2), pi, pi^2)) #' +#' x <- c("xxabcd", "xx\u0105\u0106\u0107\u0108", +#' "\u200b\u200b\u200b\u200b\U0001F3F4\U000E0067\U000E0062\U000E0073\U000E0063\U000E0074\U000E007Fabcd") +#' stri_printf("[%10s]", x) # minimum width = 10 +#' stri_printf("[%-10.3s]", x) # output of max width = 3, but pad to width of 10 +#' stri_printf("[%10s]", x, use_length=TRUE) # minimum number Unicode of code points = 10 +#' #' # vectorization wrt all arguments: #' p <- runif(10) #' stri_sprintf(ifelse(p > 0.5, "P(Y=1)=%1$.2f", "P(Y=0)=%2$.2f"), p, 1-p) @@ -103,7 +126,17 @@ #' stri_printf("%+10.3f", c(-Inf, -0, 0, Inf, NaN, NA_real_), #' na_string="", nan_string="\U0001F4A9", inf_string="\u221E") #' +#' stri_sprintf("UNIX time %1$f is %1$s.", Sys.time()) +#' +#' # the following do not work in sprintf() +#' stri_sprintf("%1$#- *2$.*3$f", 1.23456, 10, 3) # two asterisks +#' stri_sprintf(c("%s", "%f"), pi) # re-coercion needed +#' stri_sprintf("%1$s is %1$f UNIX time.", Sys.time()) # re-coercion needed +#' stri_sprintf(c("%d", "%s"), factor(11:12)) # re-coercion needed +#' stri_sprintf(c("%s", "%d"), factor(11:12)) # re-coercion needed +#' #' @rdname stri_sprintf +#' @family length #' @export stri_sprintf <- function( format, ..., @@ -123,6 +156,7 @@ stri_sprintf <- function( stri_string_format <- stri_sprintf +#' @rdname stri_sprintf #' @export stri_printf <- function( format, ..., @@ -140,14 +174,12 @@ stri_printf <- function( cat(str, file=file, sep=sep, append=append) } -### TODO: update - #' @title -#' C-Style Formatting with sprintf as a Binary Operator TODO: call stri_sprintf +#' C-Style Formatting with \code{\link{stri_sprintf}} as a Binary Operator #' #' @description -#' Provides access to base R's \code{\link[base]{sprintf}} in form of a binary +#' Provides access to \code{\link{stri_sprintf}} in form of a binary #' operator in a way similar to Python's \code{\%} overloaded for strings. #' #' @@ -158,12 +190,9 @@ stri_printf <- function( #' \code{e1 \%s$\% atomic_vector} is equivalent to #' \code{e1 \%s$\% list(atomic_vector)}. #' -#' Note that \code{\link[base]{sprintf}} takes field width in bytes, -#' not Unicode code points. See Examples for a workaround. -#' #' -#' @param e1 format strings, see \code{\link[base]{sprintf}} for syntax -#' @param e2 a list of atomic vectors to be passed to \code{\link[base]{sprintf}} +#' @param e1 format strings, see \code{\link{stri_sprintf}} for syntax +#' @param e2 a list of atomic vectors to be passed to \code{\link{stri_sprintf}} #' or a single atomic vector #' #' @return @@ -178,13 +207,12 @@ stri_printf <- function( #' "%s='%d'" %s$% list(c("a", "b", "c"), 1) #' "%s='%d'" %s$% list(c("a", "b", "c"), 1:3) #' -#' # sprintf field width: #' x <- c("abcd", "\u00DF\u00B5\U0001F970", "abcdef") -#' cat(sprintf("%s%6s%s", "-", x, "-"), sep="\n") -#' cat(sprintf("%s%s%s", "-", stringi::stri_pad(x, 6), "-"), sep="\n") +#' cat("[%6s]" %s$% x, sep="\n") # width used, not the number of bytes #' #' @rdname operator_dollar #' @aliases operator_dollar oper_dollar +#' @family length #' #' @usage #' e1 \%s$\% e2 diff --git a/devel/sphinx/news.md b/devel/sphinx/news.md index c3c814479..aacaaf160 100644 --- a/devel/sphinx/news.md +++ b/devel/sphinx/news.md @@ -3,7 +3,7 @@ ## 1.6.2.9xxx (to-be >=1.6.3) (2021-xx-yy) -* TODO ... [NEW FEATURE] #420: `stri_sprintf` (alias: `stri_string_format`) +* [NEW FEATURE] #420: `stri_sprintf` (alias: `stri_string_format`) is a Unicode-aware replacement for and enhancement of the base `sprintf`: it adds a customised handling of `NA`s (on demand), computing field size based on code point width, @@ -12,12 +12,12 @@ Moreover, `stri_printf` can be used to display formatted strings conveniently. -* [BACKWARD INCOMPATIBILITY] `%s$%` and `%stri$%` now use `stri_sprintf` - instead of `base::sprintf`. - * TODO ... [NEW FEATURE] #434: `stri_datetime_format` and `stri_datetime_parse` is now also vectorised with respect to the `format` argument. +* [BACKWARD INCOMPATIBILITY] `%s$%` and `%stri$%` now use `stri_sprintf` + instead of `base::sprintf`. + * [INTERNAL] `stri_prepare_arg*`s have been refactored, buffer overruns in the exception handling subsystem are now avoided. diff --git a/devel/sphinx/rapi/operator_dollar.md b/devel/sphinx/rapi/operator_dollar.md index add53ec66..fccbc6b9e 100644 --- a/devel/sphinx/rapi/operator_dollar.md +++ b/devel/sphinx/rapi/operator_dollar.md @@ -1,8 +1,8 @@ -# operator\_dollar: C-Style Formatting with sprintf as a Binary Operator TODO: call stri\_sprintf +# operator\_dollar: C-Style Formatting with [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html) as a Binary Operator ## Description -Provides access to base R\'s [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) in form of a binary operator in a way similar to Python\'s `%` overloaded for strings. +Provides access to [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html) in form of a binary operator in a way similar to Python\'s `%` overloaded for strings. ## Usage @@ -14,10 +14,10 @@ e1 %stri$% e2 ## Arguments -| | | -|------|--------------------------------------------------------------------------------------------------------------------------------------------------------| -| `e1` | format strings, see [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) for syntax | -| `e2` | a list of atomic vectors to be passed to [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) or a single atomic vector | +| | | +|------|--------------------------------------------------------------------------------------------------------------------------------------------| +| `e1` | format strings, see [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html) for syntax | +| `e2` | a list of atomic vectors to be passed to [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html) or a single atomic vector | ## Details @@ -25,8 +25,6 @@ Vectorized over `e1` and `e2`. `e1 %s$% atomic_vector` is equivalent to `e1 %s$% list(atomic_vector)`. -Note that [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) takes field width in bytes, not Unicode code points. See Examples for a workaround. - ## Value Returns a character vector @@ -39,6 +37,8 @@ Returns a character vector The official online manual of stringi at +Other length: [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_pad_both`](https://stringi.gagolewski.com/rapi/stri_pad_both.html)(), [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() + ## Examples @@ -57,14 +57,9 @@ The official online manual of stringi at stringi at -Other length: [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() +Other length: [`%s$%`](https://stringi.gagolewski.com/rapi/%25s$%25.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_pad_both`](https://stringi.gagolewski.com/rapi/stri_pad_both.html)(), [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() ## Examples diff --git a/devel/sphinx/rapi/stri_length.md b/devel/sphinx/rapi/stri_length.md index a452b56cb..c768247e6 100644 --- a/devel/sphinx/rapi/stri_length.md +++ b/devel/sphinx/rapi/stri_length.md @@ -36,7 +36,7 @@ Returns an integer vector of the same length as `str`. The official online manual of stringi at -Other length: [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() +Other length: [`%s$%`](https://stringi.gagolewski.com/rapi/%25s$%25.html)(), [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_pad_both`](https://stringi.gagolewski.com/rapi/stri_pad_both.html)(), [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() ## Examples diff --git a/devel/sphinx/rapi/stri_numbytes.md b/devel/sphinx/rapi/stri_numbytes.md index 0cfa08013..b33c8b233 100644 --- a/devel/sphinx/rapi/stri_numbytes.md +++ b/devel/sphinx/rapi/stri_numbytes.md @@ -40,7 +40,7 @@ Returns an integer vector of the same length as `str`. The official online manual of stringi at -Other length: [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() +Other length: [`%s$%`](https://stringi.gagolewski.com/rapi/%25s$%25.html)(), [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_pad_both`](https://stringi.gagolewski.com/rapi/stri_pad_both.html)(), [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() ## Examples diff --git a/devel/sphinx/rapi/stri_pad.md b/devel/sphinx/rapi/stri_pad.md index 39eeb9f50..f33ab400a 100644 --- a/devel/sphinx/rapi/stri_pad.md +++ b/devel/sphinx/rapi/stri_pad.md @@ -45,7 +45,7 @@ stri_pad( | `width` | integer vector giving minimal output string lengths | | `pad` | character vector giving padding code points | | `use_length` | single logical value; should the number of code points be used instead of the total code point width (see [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html))? | -| `side` | \[`stri_pad` only\] single character string; sides on which padding character is added (`left`, `right`, or `both`) | +| `side` | \[`stri_pad` only\] single character string; sides on which padding character is added (`left` (default), `right`, or `both`) | ## Details @@ -69,6 +69,8 @@ These functions return a character vector. The official online manual of stringi at +Other length: [`%s$%`](https://stringi.gagolewski.com/rapi/%25s$%25.html)(), [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() + ## Examples diff --git a/devel/sphinx/rapi/stri_sprintf.md b/devel/sphinx/rapi/stri_sprintf.md index 91f997ebb..848e705fe 100644 --- a/devel/sphinx/rapi/stri_sprintf.md +++ b/devel/sphinx/rapi/stri_sprintf.md @@ -2,7 +2,7 @@ ## Description -A Unicode-aware replacement for and enhancement of the built-in [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) function. Moreover, `stri_printf` prints formatted strings. +`stri_sprintf` (synonym: `stri_string_format`) is a Unicode-aware replacement for and enhancement of the built-in [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) function. Moreover, `stri_printf` prints formatted strings. ## Usage @@ -24,36 +24,49 @@ stri_string_format( nan_string = "NaN", use_length = FALSE ) + +stri_printf( + format, + ..., + file = "", + sep = "\n", + append = FALSE, + na_string = "NA", + inf_string = "Inf", + nan_string = "NaN", + use_length = FALSE +) ``` ## Arguments -| | | -|--------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `format` | character vector of format strings | -| `...` | vectors (coercible to integer, real, or character) | -| `na_string` | single string to represent the not-a-number | -| `inf_string` | single string to represent the (unsigned) infinity | -| `use_length` | single logical value; should the number of code points be used when applying modifiers such as `%20s` instead of the total code point width (see [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html))? | -| `file` | see [`cat`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/cat.html) | -| `sep` | see [`cat`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/cat.html) | -| `append` | see [`cat`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/cat.html) | +| | | +|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `format` | character vector of format strings [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) | +| `...` | vectors (coercible to integer, real, or character) | +| `na_string` | single string to represent missing values; if `NA`, missing values in `...` result in the corresponding outputs be missing too; use `"NA"` for compatibility with base R | +| `inf_string` | single string to represent the (unsigned) infinity (`NA` allowed) | +| `nan_string` | single string to represent the not-a-number (`NA` allowed) | +| `use_length` | single logical value; should the number of code points be used when applying modifiers such as `%20s` instead of the total code point width? | +| `file` | see [`cat`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/cat.html) | +| `sep` | see [`cat`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/cat.html) | +| `append` | see [`cat`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/cat.html) | ## Details Vectorized over `format` and all vectors passed via `...`. -`stri_string_format` is a synonym for `stri_sprintf`. +Unicode code points may have various widths when printed on the console (compare [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)). These functions, by default (see the `use_length` argument), take this into account. -Note that `stri_printf` treats missing values in `...` as `"NA"` strings by default. +This function is not locale sensitive. For instance, numbers are always formatted in the \"POSIX\" style, e.g., `-123456.789` (no thousands separator, dot as a fractional separator). Such a feature might be added at a later date, though. -Note that Unicode code points may have various widths when printed on the console and that, by default, the function takes that into account. By changing the state of the `use_length` argument, this function act as if each code point was of width 1. +All arguments passed via `...` are evaluated. If some of them are unused, a warning is generated. Too few arguments result in an error. -For `%d` and `%f` formats, factors are treated as integer vectors (underlying codes) and so are date and time objects, etc. +Note that `stri_printf` treats missing values in `...` as strings `"NA"` by default. -This function is not locale sensitive. For instance, numbers are always formatted in the \"POSIX\" style, e.g., `-123456.789`. Such a feature might be added at a later date, though. +All format specifiers supported [`sprintf`](https://stat.ethz.ch/R-manual/R-patched/library/base/html/sprintf.html) are also available here. For the formatting of integers and floating-point values, currently the system `std::snprintf()` is called, but this may change in the future. Format specifiers are normalized and necessary sanity checks are performed. -All arguments passed via `...` are evaluated. If some of them are unused, a warning is generated. Too few arguments result in an error. +Supported conversion specifiers: `dioxX` (integers) `feEgGaA` (floats) and `s` (character strings). Supported flags: `-` (left-align), `+` (force output sign or blank when `NaN` or `NA`; numeric only), `` (output minus or space for a sign; numeric only) `0` (pad with 0s; numeric only), `#` (alternative output of some numerics). ## Value @@ -65,23 +78,44 @@ The other functions return a character vector. [Marek Gagolewski](https://www.gagolewski.com/) and other contributors +## References + +`printf` in `glibc`, + +`printf` format strings -- Wikipedia, + ## See Also The official online manual of stringi at +Other length: [`%s$%`](https://stringi.gagolewski.com/rapi/%25s$%25.html)(), [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_pad_both`](https://stringi.gagolewski.com/rapi/stri_pad_both.html)(), [`stri_width`](https://stringi.gagolewski.com/rapi/stri_width.html)() + ## Examples ```r -#... stri_printf("%4s=%.3f", c("e", "e\u00b2", "\u03c0", "\u03c0\u00b2"), c(exp(1), exp(2), pi, pi^2)) ## e=2.718 ## e²=7.389 ## π=3.142 ## π²=9.870 +x <- c("xxabcd", "xx\u0105\u0106\u0107\u0108", + "\u200b\u200b\u200b\u200b\U0001F3F4\U000E0067\U000E0062\U000E0073\U000E0063\U000E0074\U000E007Fabcd") +stri_printf("[%10s]", x) # minimum width = 10 +## [ xxabcd] +## [ xxąĆćĈ] +## [ ​​​​🏴󠁧󠁢󠁳󠁣󠁴󠁿abcd] +stri_printf("[%-10.3s]", x) # output of max width = 3, but pad to width of 10 +## [xxa ] +## [xxą ] +## [​​​​🏴󠁧󠁢󠁳󠁣󠁴󠁿a ] +stri_printf("[%10s]", x, use_length=TRUE) # minimum number Unicode of code points = 10 +## [ xxabcd] +## [ xxąĆćĈ] +## [​​​​🏴󠁧󠁢󠁳󠁣󠁴󠁿abcd] # vectorization wrt all arguments: p <- runif(10) stri_sprintf(ifelse(p > 0.5, "P(Y=1)=%1$.2f", "P(Y=0)=%2$.2f"), p, 1-p) @@ -100,4 +134,17 @@ stri_printf("%+10.3f", c(-Inf, -0, 0, Inf, NaN, NA_real_), ## +∞ ## 💩 ## +stri_sprintf("UNIX time %1$f is %1$s.", Sys.time()) +## [1] "UNIX time 1621824515.023827 is 2021-05-24 12:48:35." +# the following do not work in sprintf() +stri_sprintf("%1$#- *2$.*3$f", 1.23456, 10, 3) # two asterisks +## [1] " 1.235 " +stri_sprintf(c("%s", "%f"), pi) # re-coercion needed +## [1] "3.14159265358979" "3.141593" +stri_sprintf("%1$s is %1$f UNIX time.", Sys.time()) # re-coercion needed +## [1] "2021-05-24 12:48:35 is 1621824515.027764 UNIX time." +stri_sprintf(c("%d", "%s"), factor(11:12)) # re-coercion needed +## [1] "1" "12" +stri_sprintf(c("%s", "%d"), factor(11:12)) # re-coercion needed +## [1] "11" "2" ``` diff --git a/devel/sphinx/rapi/stri_width.md b/devel/sphinx/rapi/stri_width.md index 7dc077842..eb6ffd546 100644 --- a/devel/sphinx/rapi/stri_width.md +++ b/devel/sphinx/rapi/stri_width.md @@ -50,7 +50,7 @@ Returns an integer vector of the same length as `str`. The official online manual of stringi at -Other length: [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)() +Other length: [`%s$%`](https://stringi.gagolewski.com/rapi/%25s$%25.html)(), [`stri_isempty`](https://stringi.gagolewski.com/rapi/stri_isempty.html)(), [`stri_length`](https://stringi.gagolewski.com/rapi/stri_length.html)(), [`stri_numbytes`](https://stringi.gagolewski.com/rapi/stri_numbytes.html)(), [`stri_pad_both`](https://stringi.gagolewski.com/rapi/stri_pad_both.html)(), [`stri_sprintf`](https://stringi.gagolewski.com/rapi/stri_sprintf.html)() ## Examples diff --git a/devel/tinytest/test-sprintf.R b/devel/tinytest/test-sprintf.R index 895933c3e..c47080de5 100644 --- a/devel/tinytest/test-sprintf.R +++ b/devel/tinytest/test-sprintf.R @@ -96,6 +96,11 @@ expect_identical(stri_sprintf("%+ d", 123), sprintf("%+ d", 123) ) expect_identical(stri_sprintf("%.0f", c(0, 0.5, 1, 1.5, 2)), sprintf("%.0f", c(0, 0.5, 1, 1.5, 2))) expect_identical(stri_sprintf("%d", c(0, 0.5, 1, 1.5, 2)), sprintf("%d", as.integer(c(0, 0.5, 1, 1.5, 2)))) +x <- as.integer(c(-1000000, 0, -532, 6, -2, 54326430)) +expect_identical(stri_sprintf("%d", x), sprintf("%d", x)) +expect_identical(stri_sprintf("%0i", x), sprintf("%0i", x)) +expect_identical(stri_sprintf("%6i", x), sprintf("%6i", x)) +expect_identical(stri_sprintf("%+6.3i", x), sprintf("%+6.3i", x)) expect_warning(stri_sprintf("%s %1$s %s", "a", "b", "c", "d")) @@ -165,66 +170,75 @@ expect_equivalent(c(NA_character_, "%s", NA_character_) %s$% c("a", "a", "a"), c expect_equivalent(c(NA_character_, "%s") %s$% c("a", NA_character_, "a", NA_character_), c(NA_character_, NA_character_, NA_character_, NA_character_)) expect_equivalent(c(NA_character_, "%s") %s$% c(NA_character_, "a", NA_character_, "a"), c(NA_character_, "a", NA_character_, "a")) - - -' -TODO: add quirks to stringx - -# NAs not propagated correctly [this is a string formatting function, so there should be an option to treat NA as "NA" etc. though) -sprintf(NA, "this should yield NA") # error, but should be NA_character_ -sprintf(NA_character_, "this should yield NA") # "NA", but should be NA_character_ - -# not fully vectorised: -sprintf(c("%d", "%s"), factor(11:12)) # error, converts to int only and then fails -sprintf(c("%s", "%d"), factor(11:12)) # error, converts to character only and then fails -sprintf("%1$s_%1$d", factor(11:12)) # does not work either - -sprintf("%d%d", 1:3, 1:2) # error, but should be a warning about partial recycling - -sprintf("%s%d", character(0), 1:10) # ok, empty vector - -sprintf(c(x="%s", y="%s"), c(a=1, b=2)) # attributes not preserved - -# "prefix 0 for characters zero-pads on some platforms and is ignored on others." - -sprintf("%#010x", 123) -sprintf("%f", 123)# "123.000000" -sprintf("%#f", 123)# "123.000000" -sprintf("%#g", 123)# "123.000" -sprintf("%g", 123) # "123" -sprintf("%#015g", 123) # "00000000123.000" - -sprintf("%5.3d", 1) # " 001" -sprintf("%5.8d", 1) # "00000001" -sprintf("%5.8d", 123)# "00000123" -sprintf("%5.8d", 12345678) # "12345678" -sprintf("%5.8d", 123456789)# "123456789" - -sprintf("% 05d", c(-123, 123, 0)) # "-0123" " 0123" " 0000" -sprintf("%+05d", c(-123, 123, 0)) # "-0123" "+0123" "+0000" -sprintf("%+5d", c(-123, 123, 0)) # " -123" " +123" " +0" -sprintf("% 5d", c(-123, 123, 0)) # " -123" " 123" " 0" -sprintf("%- 5d", c(-123, 123, 0)) # "-123 " " 123 " " 0 " -sprintf("%-+5d", c(-123, 123, 0)) # "-123 " "+123 " "+0 " -sprintf("%-0+5d", c(-123, 123, 0)) # "-123 " "+123 " "+0 " -sprintf("%-0 5d", c(-123, 123, 0)) # "-123 " " 123 " " 0 " - - -sprintf("%08s", "abc") -sprintf("%-8s", "abc") -sprintf("%+8s", "abc") - -sprintf("%1$s %s %2$s %s", 1, 2) - - - -sprintf("%4$*3$s", 1, "a", 6, "b") -sprintf("%4$*3$.2f", 1, "a", 6, pi) -sprintf("%4$10.*3$f", 1, "a", 6, pi) -sprintf("%4$-*3$.2f", 1, "a", 6, pi) -sprintf("e with %1$2d digits = %2$.*1$g", 10, exp(1)) - -sprintf("%*1$d", 1:5) -sprintf("%1$*1$d", 1:5) -sprintf("%1$*d", 1:5) -' +expect_identical(stri_sprintf(NA), NA_character_) +expect_identical(stri_sprintf(NA_character_), NA_character_) + +expect_identical(stri_sprintf(c("%d", "%s"), factor(11:12)), c("1", "12")) +expect_identical(stri_sprintf(c("%s", "%d"), factor(11:12)), c("11", "2")) +expect_identical(stri_sprintf("%1$s_%1$d", factor(11:12)), c("11_1", "12_2")) +expect_identical(stri_sprintf("%1$d_%1$s", factor(11:12)), c("1_11", "2_12")) +expect_identical(stri_sprintf("%s", factor(11:12)), c("11", "12")) +expect_identical(stri_sprintf("%d", factor(11:12)), c("1", "2")) + + +expect_identical(stri_sprintf("%#010x", 123), sprintf("%#010x", 123)) +expect_identical(stri_sprintf("%f", 123), sprintf("%f", 123)) +expect_identical(stri_sprintf("%#f", 123), sprintf("%#f", 123)) +expect_identical(stri_sprintf("%#g", 123), sprintf("%#g", 123)) +expect_identical(stri_sprintf("%g", 123), sprintf("%g", 123)) +expect_identical(stri_sprintf("%#015g", 123), sprintf("%#015g", 123)) +expect_identical(stri_sprintf("%5.3d", 1), sprintf("%5.3d", 1)) +expect_identical(stri_sprintf("%5.8d", 1), sprintf("%5.8d", 1)) +expect_identical(stri_sprintf("%5.8d", 123), sprintf("%5.8d", 123)) +expect_identical(stri_sprintf("%5.8d", 12345678), sprintf("%5.8d", 12345678)) +expect_identical(stri_sprintf("%5.8d", 123456789), sprintf("%5.8d", 123456789)) +expect_identical(stri_sprintf("% 05d", c(-123, 123, 0)), sprintf("% 05d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%+05d", c(-123, 123, 0)), sprintf("%+05d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%+5d", c(-123, 123, 0)), sprintf("%+5d", c(-123, 123, 0))) +expect_identical(stri_sprintf("% 5d", c(-123, 123, 0)), sprintf("% 5d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%- 5d", c(-123, 123, 0)), sprintf("%- 5d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%-+5d", c(-123, 123, 0)), sprintf("%-+5d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%-0+5d", c(-123, 123, 0)), sprintf("%-0+5d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%-0 5d", c(-123, 123, 0)), sprintf("%-0 5d", c(-123, 123, 0))) +expect_identical(stri_sprintf("%08s", "abc"), sprintf("%08s", "abc")) +expect_identical(stri_sprintf("%-8s", "abc"), sprintf("%-8s", "abc")) +expect_identical(stri_sprintf("%+8s", "abc"), sprintf("%+8s", "abc")) +expect_identical(stri_sprintf("%1$s %s %2$s %s", 1, 2), sprintf("%1$s %s %2$s %s", 1, 2)) +expect_identical(suppressWarnings(stri_sprintf("%4$*3$s", 1, "a", 6, "b")), suppressWarnings(sprintf("%4$*3$s", 1, "a", 6, "b"))) +expect_identical(suppressWarnings(stri_sprintf("%4$*3$.2f", 1, "a", 6, pi)), suppressWarnings(sprintf("%4$*3$.2f", 1, "a", 6, pi))) +expect_identical(suppressWarnings(stri_sprintf("%4$10.*3$f", 1, "a", 6, pi)), suppressWarnings(sprintf("%4$10.*3$f", 1, "a", 6, pi))) +expect_identical(suppressWarnings(stri_sprintf("%4$-*3$.2f", 1, "a", 6, pi)), suppressWarnings(sprintf("%4$-*3$.2f", 1, "a", 6, pi))) +expect_identical(stri_sprintf("e with %1$2d digits = %2$.*1$g", 10, exp(1)), sprintf("e with %1$2d digits = %2$.*1$g", 10, exp(1))) +expect_identical(stri_sprintf("%*1$d", 1:5), sprintf("%*1$d", 1:5)) +expect_identical(stri_sprintf("%1$*1$d", 1:5), sprintf("%1$*1$d", 1:5)) +expect_identical(stri_sprintf("%1$*d", 1:5), sprintf("%1$*d", 1:5)) + + +# the following are from help("sprintf"): + +expect_identical(stri_sprintf("%.0f%% said yes (out of a sample of size %.0f)", 66.666, 3), sprintf("%.0f%% said yes (out of a sample of size %.0f)", 66.666, 3)) +expect_identical(stri_sprintf("%f", pi), sprintf("%f", pi)) +expect_identical(stri_sprintf("%.3f", pi), sprintf("%.3f", pi)) +expect_identical(stri_sprintf("%1.0f", pi), sprintf("%1.0f", pi)) +expect_identical(stri_sprintf("%5.1f", pi), sprintf("%5.1f", pi)) +expect_identical(stri_sprintf("%05.1f", pi), sprintf("%05.1f", pi)) +expect_identical(stri_sprintf("%+f", pi), sprintf("%+f", pi)) +expect_identical(stri_sprintf("% f", pi), sprintf("% f", pi)) +expect_identical(stri_sprintf("%-10f", pi) , sprintf("%-10f", pi)) +expect_identical(stri_sprintf("%e", pi), sprintf("%e", pi)) +expect_identical(stri_sprintf("%E", pi), sprintf("%E", pi)) +expect_identical(stri_sprintf("%g", pi), sprintf("%g", pi)) +expect_identical(stri_sprintf("%g", 1e6 * pi) , sprintf("%g", 1e6 * pi)) +expect_identical(stri_sprintf("%.9g", 1e6 * pi) , sprintf("%.9g", 1e6 * pi)) +expect_identical(stri_sprintf("%G", 1e-6 * pi), sprintf("%G", 1e-6 * pi)) +expect_identical(stri_sprintf("%1.f", 101), sprintf("%1.f", 101)) +expect_identical(stri_sprintf("%1$d %1$x %1$X", 0:15), sprintf("%1$d %1$x %1$X", 0:15)) +expect_identical(stri_sprintf("min 10-char string '%10s'", c("a", "ABC", "and an even longer one")), sprintf("min 10-char string '%10s'", c("a", "ABC", "and an even longer one"))) +expect_identical(stri_sprintf("%09s", month.name), sprintf("%09s", month.name)) +expect_identical(stri_sprintf(paste0("e with %2d digits = %.", 1:18, "g"), 1:18, exp(1)), sprintf(paste0("e with %2d digits = %.", 1:18, "g"), 1:18, exp(1))) +expect_identical(stri_sprintf("second %2$1.0f, first %1$5.2f, third %3$1.0f", pi, 2, 3), sprintf("second %2$1.0f, first %1$5.2f, third %3$1.0f", pi, 2, 3)) +expect_identical(stri_sprintf("precision %.*f, width '%*.3f'", 3, pi, 8, pi), sprintf("precision %.*f, width '%*.3f'", 3, pi, 8, pi)) +expect_identical(stri_sprintf("e with %1$2d digits = %2$.*1$g", 1:18, exp(1)), sprintf("e with %1$2d digits = %2$.*1$g", 1:18, exp(1))) +expect_identical(stri_sprintf("%s %d", "test", 1:3), sprintf("%s %d", "test", 1:3)) +expect_identical(stri_sprintf("%a", seq(0, 1.0, 0.1)), sprintf("%a", seq(0, 1.0, 0.1))) +expect_identical(stri_sprintf("%a", c(0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1)), sprintf("%a", c(0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1))) diff --git a/man/operator_dollar.Rd b/man/operator_dollar.Rd index c98a31bb5..907f1f0aa 100644 --- a/man/operator_dollar.Rd +++ b/man/operator_dollar.Rd @@ -5,23 +5,23 @@ \alias{operator_dollar} \alias{oper_dollar} \alias{\%stri$\%} -\title{C-Style Formatting with sprintf as a Binary Operator TODO: call stri_sprintf} +\title{C-Style Formatting with \code{\link{stri_sprintf}} as a Binary Operator} \usage{ e1 \%s$\% e2 e1 \%stri$\% e2 } \arguments{ -\item{e1}{format strings, see \code{\link[base]{sprintf}} for syntax} +\item{e1}{format strings, see \code{\link{stri_sprintf}} for syntax} -\item{e2}{a list of atomic vectors to be passed to \code{\link[base]{sprintf}} +\item{e2}{a list of atomic vectors to be passed to \code{\link{stri_sprintf}} or a single atomic vector} } \value{ Returns a character vector } \description{ -Provides access to base R's \code{\link[base]{sprintf}} in form of a binary +Provides access to \code{\link{stri_sprintf}} in form of a binary operator in a way similar to Python's \code{\%} overloaded for strings. } \details{ @@ -29,9 +29,6 @@ Vectorized over \code{e1} and \code{e2}. \code{e1 \%s$\% atomic_vector} is equivalent to \code{e1 \%s$\% list(atomic_vector)}. - -Note that \code{\link[base]{sprintf}} takes field width in bytes, -not Unicode code points. See Examples for a workaround. } \examples{ "value='\%d'" \%s$\% 3 @@ -41,15 +38,22 @@ not Unicode code points. See Examples for a workaround. "\%s='\%d'" \%s$\% list(c("a", "b", "c"), 1) "\%s='\%d'" \%s$\% list(c("a", "b", "c"), 1:3) -# sprintf field width: x <- c("abcd", "\u00DF\u00B5\U0001F970", "abcdef") -cat(sprintf("\%s\%6s\%s", "-", x, "-"), sep="\n") -cat(sprintf("\%s\%s\%s", "-", stringi::stri_pad(x, 6), "-"), sep="\n") +cat("[\%6s]" \%s$\% x, sep="\n") # width used, not the number of bytes -} -\author{ -\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors } \seealso{ The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} + +Other length: +\code{\link{stri_isempty}()}, +\code{\link{stri_length}()}, +\code{\link{stri_numbytes}()}, +\code{\link{stri_pad_both}()}, +\code{\link{stri_sprintf}()}, +\code{\link{stri_width}()} +} +\concept{length} +\author{ +\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors } diff --git a/man/stri_isempty.Rd b/man/stri_isempty.Rd index 306946815..3242666d1 100644 --- a/man/stri_isempty.Rd +++ b/man/stri_isempty.Rd @@ -29,8 +29,11 @@ stri_isempty(character(1)) The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} Other length: +\code{\link{\%s$\%}()}, \code{\link{stri_length}()}, \code{\link{stri_numbytes}()}, +\code{\link{stri_pad_both}()}, +\code{\link{stri_sprintf}()}, \code{\link{stri_width}()} } \concept{length} diff --git a/man/stri_length.Rd b/man/stri_length.Rd index 10613b5dc..3c54e7add 100644 --- a/man/stri_length.Rd +++ b/man/stri_length.Rd @@ -46,8 +46,11 @@ stri_count_boundaries(stri_trans_nfkd('\u0105'), type='character') # ...and one The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} Other length: +\code{\link{\%s$\%}()}, \code{\link{stri_isempty}()}, \code{\link{stri_numbytes}()}, +\code{\link{stri_pad_both}()}, +\code{\link{stri_sprintf}()}, \code{\link{stri_width}()} } \concept{length} diff --git a/man/stri_numbytes.Rd b/man/stri_numbytes.Rd index 1c0aeae41..959e5d40a 100644 --- a/man/stri_numbytes.Rd +++ b/man/stri_numbytes.Rd @@ -48,8 +48,11 @@ stri_numbytes('\U001F600') # compare stri_length('\U001F600') The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} Other length: +\code{\link{\%s$\%}()}, \code{\link{stri_isempty}()}, \code{\link{stri_length}()}, +\code{\link{stri_pad_both}()}, +\code{\link{stri_sprintf}()}, \code{\link{stri_width}()} } \concept{length} diff --git a/man/stri_pad.Rd b/man/stri_pad.Rd index 185a27279..37182cf41 100644 --- a/man/stri_pad.Rd +++ b/man/stri_pad.Rd @@ -49,7 +49,7 @@ points be used instead of the total code point width \item{side}{[\code{stri_pad} only] single character string; sides on which padding character is added -(\code{left}, \code{right}, or \code{both})} +(\code{left} (default), \code{right}, or \code{both})} } \value{ These functions return a character vector. @@ -86,10 +86,20 @@ cat(stri_pad_both(c('the', 'string', 'processing', 'package'), cat(stri_pad_both(c('\ud6c8\ubbfc\uc815\uc74c', # takes width into account stri_trans_nfkd('\ud6c8\ubbfc\uc815\uc74c'), 'abcd'), width=10), sep='\n') -} -\author{ -\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors + } \seealso{ The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} + +Other length: +\code{\link{\%s$\%}()}, +\code{\link{stri_isempty}()}, +\code{\link{stri_length}()}, +\code{\link{stri_numbytes}()}, +\code{\link{stri_sprintf}()}, +\code{\link{stri_width}()} +} +\concept{length} +\author{ +\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors } diff --git a/man/stri_sprintf.Rd b/man/stri_sprintf.Rd index 9e1996676..092cd027f 100644 --- a/man/stri_sprintf.Rd +++ b/man/stri_sprintf.Rd @@ -3,6 +3,7 @@ \name{stri_sprintf} \alias{stri_sprintf} \alias{stri_string_format} +\alias{stri_printf} \title{Format Strings} \usage{ stri_sprintf( @@ -22,19 +23,36 @@ stri_string_format( nan_string = "NaN", use_length = FALSE ) + +stri_printf( + format, + ..., + file = "", + sep = "\\n", + append = FALSE, + na_string = "NA", + inf_string = "Inf", + nan_string = "NaN", + use_length = FALSE +) } \arguments{ -\item{format}{character vector of format strings} +\item{format}{character vector of format strings \code{\link[base]{sprintf}}} \item{...}{vectors (coercible to integer, real, or character)} -\item{na_string}{single string to represent the not-a-number} +\item{na_string}{single string to represent missing values; +if \code{NA}, missing values in \code{...} +result in the corresponding outputs be missing too; +use \code{"NA"} for compatibility with base R} + +\item{inf_string}{single string to represent the (unsigned) infinity (\code{NA} allowed)} -\item{inf_string}{single string to represent the (unsigned) infinity} +\item{nan_string}{single string to represent the not-a-number (\code{NA} allowed)} \item{use_length}{single logical value; should the number of code points be used when applying modifiers such as \code{\%20s} -instead of the total code point width (see \code{\link{stri_width}})?} +instead of the total code point width?} \item{file}{see \code{\link[base]{cat}}} @@ -50,38 +68,54 @@ text on the standard output or other connection/file. Hence, it returns The other functions return a character vector. } \description{ -A Unicode-aware replacement for and enhancement of +\code{stri_sprintf} (synonym: \code{stri_string_format}) +is a Unicode-aware replacement for and enhancement of the built-in \code{\link[base]{sprintf}} function. Moreover, \code{stri_printf} prints formatted strings. } \details{ Vectorized over \code{format} and all vectors passed via \code{...}. -\code{stri_string_format} is a synonym for \code{stri_sprintf}. - -Note that \code{stri_printf} treats missing values in \code{...} -as \code{"NA"} strings by default. - -Note that Unicode code points may have various widths when -printed on the console and that, by default, the function takes that -into account. By changing the state of the \code{use_length} -argument, this function act as if each code point was of width 1. - -For \code{\%d} and \code{\%f} formats, factors are treated as integer -vectors (underlying codes) and so are date and time objects, etc. +Unicode code points may have various widths when +printed on the console (compare \code{\link{stri_width}}). +These functions, by default (see the \code{use_length} argument), take this +into account. This function is not locale sensitive. For instance, numbers are -always formatted in the "POSIX" style, e.g., \code{-123456.789}. +always formatted in the "POSIX" style, e.g., \code{-123456.789} +(no thousands separator, dot as a fractional separator). Such a feature might be added at a later date, though. All arguments passed via \code{...} are evaluated. If some of them are unused, a warning is generated. Too few arguments result in an error. + +Note that \code{stri_printf} treats missing values in \code{...} +as strings \code{"NA"} by default. + +All format specifiers supported \code{\link[base]{sprintf}} are +also available here. For the formatting of integers and floating-point +values, currently the system \code{std::snprintf()} is called, but +this may change in the future. Format specifiers are normalized +and necessary sanity checks are performed. + +Supported conversion specifiers: \code{dioxX} (integers) +\code{feEgGaA} (floats) and \code{s} (character strings). +Supported flags: \code{-} (left-align), +\code{+} (force output sign or blank when \code{NaN} or \code{NA}; numeric only), +\code{} (output minus or space for a sign; numeric only) +\code{0} (pad with 0s; numeric only), +\code{#} (alternative output of some numerics). } \examples{ -#... stri_printf("\%4s=\%.3f", c("e", "e\u00b2", "\u03c0", "\u03c0\u00b2"), c(exp(1), exp(2), pi, pi^2)) +x <- c("xxabcd", "xx\u0105\u0106\u0107\u0108", + "\u200b\u200b\u200b\u200b\U0001F3F4\U000E0067\U000E0062\U000E0073\U000E0063\U000E0074\U000E007Fabcd") +stri_printf("[\%10s]", x) # minimum width = 10 +stri_printf("[\%-10.3s]", x) # output of max width = 3, but pad to width of 10 +stri_printf("[\%10s]", x, use_length=TRUE) # minimum number Unicode of code points = 10 + # vectorization wrt all arguments: p <- runif(10) stri_sprintf(ifelse(p > 0.5, "P(Y=1)=\%1$.2f", "P(Y=0)=\%2$.2f"), p, 1-p) @@ -94,10 +128,35 @@ stri_sprintf("\%s) \%s", letters[seq_along(x)], c("\u2718", "\u2713")[x+1]) stri_printf("\%+10.3f", c(-Inf, -0, 0, Inf, NaN, NA_real_), na_string="", nan_string="\U0001F4A9", inf_string="\u221E") +stri_sprintf("UNIX time \%1$f is \%1$s.", Sys.time()) + +# the following do not work in sprintf() +stri_sprintf("\%1$#- *2$.*3$f", 1.23456, 10, 3) # two asterisks +stri_sprintf(c("\%s", "\%f"), pi) # re-coercion needed +stri_sprintf("\%1$s is \%1$f UNIX time.", Sys.time()) # re-coercion needed +stri_sprintf(c("\%d", "\%s"), factor(11:12)) # re-coercion needed +stri_sprintf(c("\%s", "\%d"), factor(11:12)) # re-coercion needed + } -\author{ -\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors +\references{ +\code{printf} in \code{glibc}, +\url{https://man.archlinux.org/man/printf.3} + +\code{printf} format strings -- Wikipedia, +\url{https://en.wikipedia.org/wiki/Printf_format_string} } \seealso{ The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} + +Other length: +\code{\link{\%s$\%}()}, +\code{\link{stri_isempty}()}, +\code{\link{stri_length}()}, +\code{\link{stri_numbytes}()}, +\code{\link{stri_pad_both}()}, +\code{\link{stri_width}()} +} +\concept{length} +\author{ +\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors } diff --git a/man/stri_width.Rd b/man/stri_width.Rd index aea899528..6e1a40d08 100644 --- a/man/stri_width.Rd +++ b/man/stri_width.Rd @@ -62,9 +62,12 @@ stri_width(stri_trans_nfkd('\ubc1f')) # includes Hangul Jamo medial vowels and f The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/} Other length: +\code{\link{\%s$\%}()}, \code{\link{stri_isempty}()}, \code{\link{stri_length}()}, -\code{\link{stri_numbytes}()} +\code{\link{stri_numbytes}()}, +\code{\link{stri_pad_both}()}, +\code{\link{stri_sprintf}()} } \concept{length} \author{ diff --git a/src/stri_sprintf.cpp b/src/stri_sprintf.cpp index 3f143ccb7..9cb197c5d 100644 --- a/src/stri_sprintf.cpp +++ b/src/stri_sprintf.cpp @@ -178,9 +178,10 @@ int stri__find_type_spec(const char* f, R_len_t j0, R_len_t n) R_len_t j1 = j0; STRI_ASSERT(f[j0-1] == '%'); while (true) { - if (j1 >= n) + if (j1 >= n) { + // TODO: note that this will display UTF-8 also on non-UTF-8 outputs throw StriException(MSG__INVALID_FORMAT_SPECIFIER, f+j0); // dangling %... - else if (strchr(STRI_SPRINTF_SPEC_TYPE, f[j1]) != nullptr) + } else if (strchr(STRI_SPRINTF_SPEC_TYPE, f[j1]) != nullptr) break; else if (strchr(STRI_SPRINTF_FLAGS, f[j1]) != nullptr) ; @@ -188,10 +189,12 @@ int stri__find_type_spec(const char* f, R_len_t j0, R_len_t n) ; else if (f[j1] >= '0' && f[j1] <= '9') ; - else + else { + // TODO: note that this will display UTF-8 also on non-UTF-8 outputs throw StriException( MSG__INVALID_FORMAT_SPECIFIER "; " MSG__EXPECTED_CHAR_IN_SET, (f+j0), STRI_SPRINTF_ACCEPTED_CHARS); + } j1++; }