Skip to content

Commit

Permalink
fixed arg handling for lifetables; handling other package issues, not…
Browse files Browse the repository at this point in the history
… done yet
  • Loading branch information
timriffe committed Dec 29, 2023
1 parent 9b62861 commit ab2b2ad
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 10 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: DemoTools
Type: Package
Title: Standardize, Evaluate, and Adjust Demographic Data
Version: 01.13.79
Date: 2023-12-20
Version: 01.13.80
Date: 2023-12-29
Authors@R: c(
person("Tim", "Riffe", role = c("aut", "cre"),
email = "[email protected]", comment = c(ORCID = "0000-0002-2673-4622")),
Expand All @@ -22,7 +22,7 @@ License: file LICENSE
LazyLoad: yes
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
Depends:
R (>= 3.6),
Rcpp (>= 0.12.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ importFrom(data.table,rbindlist)
importFrom(data.table,setDT)
importFrom(data.table,uniqueN)
importFrom(demogR,cdmltw)
importFrom(dplyr,case_when)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
Expand Down
22 changes: 22 additions & 0 deletions R/lt_abridged.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#' @param extrapLaw character. If extrapolating, which parametric mortality law should be invoked? Options include
#' \code{"Kannisto", "Kannisto_Makeham", "Makeham", "Gompertz", "GGompertz", "Beard", "Beard_Makeham", "Quadratic"}. Default \code{"Kannisto"} if the highest age is at least 90, otherwise `"makeham"`. See details.
#' @inheritParams lt_a_closeout
#' @importFrom dplyr case_when
#' @export
#' @return Lifetable in data.frame with columns
#' \itemize{
Expand Down Expand Up @@ -183,9 +184,30 @@ lt_abridged <- function(Deaths = NULL,
extrapFrom = max(Age),
extrapFit = NULL,
...) {

# some handy name coercion
a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak",
a0rule == "Coale-Demeny" ~ "cd",
TRUE ~ a0rule)
axmethod <- case_when(axmethod == "UN (Greville)" ~ "un",
axmethod == "PASEX" ~ "pas",
TRUE ~ axmethod)
Sex <- substr(Sex, 1, 1) |>
tolower()
Sex <- ifelse(Sex == "t", "b", Sex)

region <- substr(region, 1, 1) |>
tolower()
if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
}


# now we check args
axmethod <- match.arg(axmethod, choices = c("pas","un"))
Sex <- match.arg(Sex, choices = c("m","f","b"))
a0rule <- match.arg(a0rule, choices = c("ak","cd"))

if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
extrapLaw <- match.arg(extrapLaw, choices = c("kannisto",
Expand Down
18 changes: 18 additions & 0 deletions R/lt_regroup_age.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ lt_single2abridged <- function(lx,
#'
#' @export
#' @importFrom ungroup pclm
#' @importFrom dplyr case_when
#' @examples
#' Mx <- c(.23669,.04672,.00982,.00511,.00697,.01036,.01169,
#' .01332,.01528,.01757,.02092,.02517,.03225,.04241,.06056,
Expand Down Expand Up @@ -151,6 +152,23 @@ lt_abridged2single <- function(
NN <- length(Age)
#stopifnot(length(nMx) == NN)

# some handy name coercion
a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak",
a0rule == "Coale-Demeny" ~ "cd",
TRUE ~ a0rule)
axmethod <- case_when(axmethod == "UN (Greville)" ~ "un",
axmethod == "PASEX" ~ "pas",
TRUE ~ axmethod)
Sex <- substr(Sex, 1, 1) |>
tolower()
Sex <- ifelse(Sex == "t", "b", Sex)

region <- substr(region, 1, 1) |>
tolower()
if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
}

if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
extrapLaw <- match.arg(extrapLaw, choices = c("kannisto",
Expand Down
16 changes: 16 additions & 0 deletions R/lt_single.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' \item{ex}{numeric. Age-specific remaining life expectancy.}
#' }
#' @export
#' @importFrom dplyr case_when
lt_single_mx <- function(nMx,
Age = 1:length(nMx) - 1,
radix = 1e5,
Expand All @@ -36,6 +37,21 @@ lt_single_mx <- function(nMx,
...) {

stopifnot(extrapFrom <= max(Age))

# some handy name coercion
a0rule <- case_when(a0rule == "Andreev-Kingkade" ~ "ak",
a0rule == "Coale-Demeny" ~ "cd",
TRUE ~ a0rule)
Sex <- substr(Sex, 1, 1) |>
tolower()
Sex <- ifelse(Sex == "t", "b", Sex)

region <- substr(region, 1, 1) |>
tolower()
if (!is.null(extrapLaw)){
extrapLaw <- tolower(extrapLaw)
}

Sex <- match.arg(Sex, choices = c("m","f","b"))
a0rule <- match.arg(a0rule, choices = c("ak","cd"))
if (!is.null(extrapLaw)){
Expand Down
4 changes: 3 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ getModelLifeTable <- function(ModelName, Sex) {

avg_adj <- function(x) {
n <- length(x)
(shift.vector(x,-1, NA) + shift.vector(x, 1, NA)) / 2
out <- (shift.vector(x,-1, NA) + shift.vector(x, 1, NA)) / 2
names(out) <- names(x)
out
}

#' convert strings to concatenation of lower case alphabet
Expand Down
4 changes: 2 additions & 2 deletions R/utilsAge.R
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ rescaleAgeGroups <- function(Value1,
#' Age <- c(0,1,3,seq(5,100,5))
#' AgeInt <- c(1,2,2,rep(5,19),1)
#' Value <- tapply(V1,rep(Age,times=AgeInt), sum)
#'
#'
#' is_abridged(Age)
#' age_abridge_force(Value, Age)
age_abridge_force <- function(Value, Age) {
Expand All @@ -744,7 +744,7 @@ age_abridge_force <- function(Value, Age) {
Value,
Age = Age)
#a1 <- min(Age):(length(v1) - 1)
a1 <- 1:length(v1) - 1 + min(Age)
a1 <- (1:length(v1) - 1 + min(Age)) |> as.integer()
AgeAbr <- calcAgeAbr(a1)
vabr <- tapply(v1, AgeAbr, sum)
vabr
Expand Down
217 changes: 217 additions & 0 deletions dev/ediev.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
smooth_age_zigzag_dd <- function(
Value, #population counts by single year of age
Age, #age index for Value: should be a continuous range of integer values (we will not check that)
OAG = TRUE, #is the last age group an open interval?
ret_ggPlot = TRUE, #if TRUE, we return a list(data=ValueOut, ggplot=myPlot) with a plot for checking results
#if FALSE, we return the smoothed series ValueOut
legendPlot = NULL #legend for the ggPlot
)
{
#Extension of Feeney (2013) "Removing Zig-Zag from Age Data" for single age data
#in the spirit of Dalkhat M. Ediev (2021) "A model of age heaping with applications to population graduation that retains informative demographic variation"
#Main differences with Feeney's algorithm is that, apart for working with single age data
#to suppose that age heaping can start 2 years before (and symmetrically 2 years after)
#and not only 1 year before and after.
#For example for age 40, we suppose that heaping occurs symmetrically, in the same proportion, at
#ages 39 and 41, as Feeney supposed for five-years age groups with adjacent age groups,
#but in the same spirit we suppose that
#heaping can occur also at ages 38 and 42 towards attracting age 40.
#We also suppose that heaping can only occurs for ages multiple of 5 and 10
if (OAG) {
#if the last age is an open interval, we drop it
AgeRangeOut <- Age[1:(length(Age)-1)]
}
#determine the ages with the variable parameters
#For example we start with ages 3 and 4 for heaping at age 5 and ages 6 and 7 use symmetrical values
#generally speaking heaping can start at ages x-2, x-1 and then symmetrically at ages x+1, x+2 towards attracting age x
#we suppose that the first and the last age we can treat have at least values for 2 ages before and after
#that means that if the first age in Age is 9 or 10, we will skip the first
getAgeRange <- function(AgeRangeOut) {
ageRangeMin <- max(5, trunc((AgeRangeOut[1] + 2) / 5) * 5)
if (ageRangeMin < AgeRangeOut[1] + 2) ageRangeMin <- ageRangeMin + 5
ageRangeMax <- trunc((AgeRangeOut[length(AgeRangeOut)] - 2) / 5) * 5
if (ageRangeMax > AgeRangeOut[length(AgeRangeOut)] - 2) ageRangeMax <- ageRangeMax - 5
return (c(ageRangeMin, ageRangeMax))
}
ageRange <- getAgeRange (AgeRangeOut)
ageRangeMin <- ageRange[1]
ageRangeMax <- ageRange[2]
#number of ages multiples of 5
nAges_mult <- trunc ((ageRangeMax - ageRangeMin) / 5) + 1
if (nAges_mult < 2) stop ("Age range should include at least two consecutive ages multiple of 5")
#initial values for the parameters
#if first attracting age is 5, age 3 has initial proportion of 0.1, age 4 of 0.2
#these two parameters will get 'optimized' by R optim
#the same parameters are applied, symmetrically, for age 6 (0.2) and age 7 (0.1)
param <- rep (c(0.1, 0.2), nAges_mult)
param_min <- rep(0, nAges_mult * 2)
param_max <- rep(9, nAges_mult * 2)

computeNewPop <- function(data, par, Age, ageRangeMin, nAges_mult, ret_Props=FALSE) {
if (ret_Props) props <- rep(NA, length(data))
for (ageIter in (1:nAges_mult)) {
currFirstAge <- ageRangeMin + (ageIter - 1) * 5 - 2
indexInData <- currFirstAge - Age[1] + 1
if (ret_Props) {
props[indexInData] <- par[(ageIter - 1) * 2 + 1]
props[indexInData+1] <- par[(ageIter - 1) * 2 + 2]
props[indexInData+3] <- par[(ageIter - 1) * 2 + 2]
props[indexInData+4] <- par[(ageIter - 1) * 2 + 1]
} else {
data[indexInData + 2] <- data[indexInData + 2] -
(data[indexInData] + data[indexInData + 4]) * par[(ageIter - 1) * 2 + 1] -
(data[indexInData + 1] + data[indexInData + 3]) * par[(ageIter - 1) * 2 + 2]
data[indexInData] <- data[indexInData] * (1 + par[(ageIter - 1) * 2 + 1])
data[indexInData+1] <- data[indexInData+1] * (1 + par[(ageIter - 1) * 2 + 2])
data[indexInData+3] <- data[indexInData+3] * (1 + par[(ageIter - 1) * 2 + 2])
data[indexInData+4] <- data[indexInData+4] * (1 + par[(ageIter - 1) * 2 + 1])
}
}

if (ret_Props) {
return (props)
} else {
return (data)
}
}

optim_fun <- function(data, par) {
popIter <- computeNewPop(data, par, Age, ageRangeMin, nAges_mult)
totPop <- sum (popIter)
objective_to_min <- 0
for (ageIter in (1:nAges_mult)) {
currFirstAge <- ageRangeMin + (ageIter - 1) * 5 - 2
indexInData <- currFirstAge - Age[1] + 1
term1 <- ((popIter[indexInData + 2] - (popIter[indexInData + 1] + popIter[indexInData + 3]) / 2)) / totPop
term2 <- ((popIter[indexInData + 2] - (popIter[indexInData] + popIter[indexInData + 4]) / 2)) / totPop
objective_to_min <- objective_to_min + term1^2 + term2^2
}
return (objective_to_min)
}

oldOptimValue <- 10^1000

for (rep in (1:10)) {
#apply optimization up to 10 times, using parameters obtained in the previous step, just in case...
parResult <- optim(data=Value, par=param, lower=param_min, upper=param_max, fn=optim_fun, method = "L-BFGS-B")
if (parResult$value >= oldOptimValue) break
param <- parResult$par
oldOptimValue <- parResult$value
}
valueOut <- computeNewPop(Value, parResult$par, Age, ageRangeMin, nAges_mult)

if (ret_ggPlot) {
library2 <- function(pack) {
#pack<-"tcltk2"
if( !(pack %in% installed.packages()))
{install.packages(pack)}
library(pack,character.only = TRUE)
}
library2("ggplot2")
library2("scales")

getScales <- function(Value, ageRangeMin, ageRangeMax) {
maxCount <- max(Value) * 1.0
power <- 0
while (maxCount > 1) {
maxCount <- maxCount / 10.0
power <- power + 1
}
maxCount <- ceiling(maxCount * 10) * 10^(power-1)
leftScale <- seq(0, maxCount, maxCount / 4)
rightScale <- seq(0, 1, 0.25)
xScale <- seq(ageRangeMin-5, ageRangeMax+5, 5)
return (list(left=leftScale, right=rightScale, x=xScale))
}

df <- data.frame(Age=Age, val=Value)
df$type <- 'Original'
temp <- data.frame(Age=Age, val=valueOut)
temp$type <- 'Corrected'
df <- rbind(df, temp)
props <- computeNewPop(Value, parResult$par, Age, ageRangeMin, nAges_mult, ret_Prop = TRUE)
temp <- data.frame(Age=Age, val=props)
temp$type <- 'Attraction'
#transform into proportions
temp$val <- temp$val / (1 + temp$val)
scale_axes <- getScales (Value, ageRangeMin, ageRangeMax)
maxY_left <- scale_axes$left[length(scale_axes$left)]
#adjust values in preparation for second axis (we will correct the labels later on in scale_y_continuous)
temp$val <- temp$val * maxY_left
df <- rbind(df, temp)
df$type <- factor (df$type, levels=c("Original", "Corrected", "Attraction"))
myLabelNames <- c("Original", "Corrected", "Attraction factors")

myPlot <- ggplot(df, aes(x=Age, y=val, color=type, alpha=type, size=type)) + geom_line()
myPlot <- myPlot + scale_y_continuous(breaks = scale_axes$left, name="population counts",
sec.axis = sec_axis(as.formula(paste("~./", maxY_left, sep="")), name="attraction proportions",
breaks=scale_axes$right),
labels = scales::label_number())
myPlot <- myPlot + scale_x_continuous(breaks = scale_axes$x)
myPlot <- myPlot + scale_colour_manual(values=c("red", "blue", "grey40"), labels=myLabelNames)
myPlot <- myPlot + scale_size_manual(values=c(1, 2, 2), labels=myLabelNames)
myPlot <- myPlot + scale_alpha_manual(values=c(1, 1, 0.2), labels=myLabelNames)
myPlot <- myPlot + theme_bw() + theme(legend.key = element_blank(), legend.title=element_blank(), legend.position="bottom")
myPlot <- myPlot + theme_text(1)
myPlot <- myPlot + labs(color="", alpha="", size="")
if (!is.null(legendPlot)) myPlot <- myPlot + ggtitle(legendPlot)

return (list(data=valueOut, ggplot=myPlot))
} else {
return (valueOut)
}
}

#data for Colombia 1973
col1973 <- structure(list(
Age = 0:99,
Value = c(493538L, 497954L, 580000L,
610204L, 609266L, 609670L, 599184L, 628352L, 624302L, 549158L,
624112L, 527810L, 611266L, 541084L, 490342L, 466650L, 435310L,
423912L, 444468L, 334732L, 345390L, 301686L, 319468L, 312100L,
267828L, 293902L, 230690L, 230770L, 244100L, 178590L, 278284L,
136552L, 201120L, 218514L, 159054L, 217800L, 156786L, 162270L,
203560L, 141258L, 247758L, 94344L, 166104L, 164336L, 116024L,
182858L, 104268L, 102916L, 136222L, 90550L, 180598L, 68132L,
109788L, 108058L, 81438L, 116446L, 77230L, 62844L, 73906L, 49812L,
136486L, 37506L, 61658L, 69702L, 43570L, 72760L, 36930L, 34102L,
40052L, 24126L, 68924L, 16004L, 32988L, 40868L, 18406L, 31836L,
14222L, 11824L, 16206L, 6972L, 24294L, 5016L, 7392L, 6906L, 6244L,
9424L, 3806L, 3286L, 3478L, 1912L, 5260L, 916L, 1540L, 1142L,
1266L, 1674L, 602L, 468L, 752L, 2360L)),
class = "data.frame",
row.names = c(NA,-100L))

#Alexandropol 1897
Alex1897 <- structure(list(
Age = 0:99,
Value = c(376.7579251, 342.1757925,
398.3717579, 337.8530259, 314.0778098, 290.3025937, 355.1440922,
334, 345, 360, 392, 260.0432277, 370, 307.5936599, 290.3025937,
247.074928, 281.6570605, 268.6887608, 298.9481268, 164.9423631,
333.5302594, 1401.253602, 1729.783862, 1643.32853, 1669.26513,
700.9654179, 324.8847262, 273.0115274, 234.1066282, 104.4236311,
471.8587896, 108.7463977, 186.556196, 151.9740634, 113.0691643,
389.7262248, 151.9740634, 126.037464, 156.29683, 48.22766571,
458.8904899, 48.22766571, 108.7463977, 56.87319885, 65.51873199,
277.3342939, 82.80979827, 52.55043228, 91.45533141, 48.22766571,
363.7896254, 26.61383285, 52.55043228, 39.58213256, 39.58213256,
199.5244957, 56.87319885, 39.58213256, 48.22766571, 5, 285.9798271,
17.96829971, 26.61383285, 17.96829971, 22.29106628, 78.4870317,
22.29106628, 22.29106628, 26.61383285, 9.322766571, 121.7146974,
0.677233429, 9.322766571, 5, 5, 30.93659942, 13.64553314, 9.322766571,
9.322766571, 13.64553314, 35.25936599, 9.322766571, 9.322766571,
5, 0.677233429, 17.96829971, 0.677233429, 5, 0.677233429, 9.322766571,
17.96829971, 5, 5, 5, 5, 0.677233429, 0.677233429, 0.677233429,
0.677233429, 0.677233429)),
class = "data.frame",
row.names = c(NA, -100L))

#call the function for Colombia
resCol <- smooth_age_zigzag_dd(col1973$Value, col1973$Age, OAG = TRUE, ret_ggPlot = TRUE, legendPlot="Colombia 1973")
#show the plot for Colombia
resCol$ggplot
#call the function for Alexandropol
resAlex <- smooth_age_zigzag_dd(Alex1897$Value, Alex1897$Age, OAG = TRUE, ret_ggPlot = TRUE, legendPlot="Alexandropol 1897")
#show the plot
resAlex$ggplot
3 changes: 1 addition & 2 deletions man/graduate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vignettes/Age-heaping_quality_with_Demotools.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@ r5

Values greater than 0.1 coupled with a sawtooth value greater than 0 already suggest that some smoothing is warranted. If there is no detected sawtooth pattern, then five-year-roughness indices should probably need to be higher and visually confirmed before deciding to smooth. Further guidelines can be found in the vignette on smoothing.

##References
# References
2 changes: 1 addition & 1 deletion vignettes/lifetables_with_demotools.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ In this output, the `Age` column indicates the lower bound of each age group. Th



## References
# References

0 comments on commit ab2b2ad

Please sign in to comment.