Skip to content

Commit

Permalink
Merge pull request #66 from ISAAKiel/pop.sim
Browse files Browse the repository at this point in the history
Pop.sim merged with master branch.
  • Loading branch information
nmueller18 authored Jan 11, 2025
2 parents 00a4fe7 + 6069add commit e96f7c8
Show file tree
Hide file tree
Showing 19 changed files with 765 additions and 172 deletions.
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mortAAR
Type: Package
Title: Analysis of Archaeological Mortality Data
Version: 1.1.6
Version: 1.1.7
Authors@R: c(
person("Nils", "Mueller-Scheessel", email = "[email protected]", role = c("aut", "cre", "cph")),
person("Martin", "Hinz", email = "[email protected]", role = c("aut")),
Expand All @@ -19,7 +19,7 @@ Description: A collection of functions for the analysis of archaeological mortal
<https://books.google.de/books?id=nG5FoO_becAC&lpg=PA27&ots=LG0b_xrx6O&dq=life%20table%20archaeology&pg=PA27#v=onepage&q&f=false>).
It takes demographic data in different formats and displays the result in a standard life table
as well as plots the relevant indices (percentage of deaths, survivorship, probability of death, life expectancy, percentage of population).
Date: 2024-03-06
Date: 2024-11-28
License: GPL-3 | file LICENSE
Encoding: UTF-8
LazyData: true
Expand All @@ -29,7 +29,9 @@ Imports:
reshape2 (>= 1.4.2),
methods (>= 3.3.3),
tibble (>= 3.0.3),
rlang (>= 1.1.1)
rlang (>= 1.1.1),
flexsurv (>= 2.2.2),
stats (>= 4.3.1)
RoxygenNote: 7.2.3
Suggests:
testthat,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ S3method(print,mortaar_life_table)
S3method(print,mortaar_life_table_list)
export(as.mortaar_life_table)
export(as.mortaar_life_table_list)
export(halley.band)
export(is.mortaar_life_table)
export(is.mortaar_life_table_list)
export(life.table)
Expand All @@ -39,7 +40,10 @@ export(lt.population_size)
export(lt.representativity)
export(lt.reproduction)
export(lt.sexrelation)
export(pop.sim.gomp)
export(prep.life.table)
export(random.cat)
export(random.cat.apply)
importFrom(Rdpack,reprompt)
importFrom(graphics,axis)
importFrom(graphics,grid)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# mortAAR 1.1.7
- added function for Halley bands
- added function for population simulation
- added helper functions for the random generating and applying of age categories
- added reprasentativity check via Total fertility rate

# mortAAR 1.1.6
- fixed the error in plotting that occurred after the fix of "aes_string" of ggplot2
and that still persisted
Expand Down
143 changes: 143 additions & 0 deletions R/halley_band.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#' Halley band of the mortality profile of a skeletal population
#'
#' In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)}
#' proposed a method they called 'Halley band' as alternative for other
#' methods of sampling from an skeletal population. It basically involves
#' sampling n times from the age-estimation of each individual and then
#' only taking the 2.5th and 97.5th percentile into account. The space
#' between, they dubbed 'Halley band' but pointed out that it
#' is not to be confused with confidence intervals.
#'
#' @param x a data.frame with individuals and age estimations.
#'
#' @param n number of runs, default: 1000.
#'
#' @param uncert level of uncertainty, default: 0.95.
#'
#' @param agebeg numeric. Starting age of the respective individual.
#'
#' @param ageend numeric. Closing age of the respective individual.
#'
#' @param agerange character. Determination if the closing
#' age leaves a gap to the following age category. If yes (= "excluded"),
#' "1" is added to avoid gaps, default: "excluded".
#'
#' @return
#' One data.frame with the following items:
#'
#' \itemize{
#' \item \bold{age}: age in years.
#' \item \bold{lower_dx}: Lower boundary of uncertainty for dx.
#' \item \bold{upper_dx}: Upper boundary of uncertainty for dx.
#' \item \bold{lower_qx}: Lower boundary of uncertainty for qx.
#' \item \bold{upper_qx}: Upper boundary of uncertainty for qx.
#' \item \bold{lower_lx}: Lower boundary of uncertainty for lx.
#' \item \bold{upper_lx}: Upper boundary of uncertainty for lx.
#' }
#'
#' @references
#'
#' \insertRef{Luy_Wittwer-Backofen_2005}{mortAAR}
#'
#' \insertRef{Luy_Wittwer-Backofen_2008}{mortAAR}
#'
#' @examples
#'
#'# create simulated population with artifical coarsening first
#' pop_sim <- pop.sim.gomp(n = 1000)
#' sim_ranges <- random.cat()
#'
#' # apply random age categories to simulated ages
#' sim_appl <- random.cat.apply(pop_sim$result, age = "age",
#' age_ranges = sim_ranges, from = "from", to = "to")
#'
#' # create halley bands
#' demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from",
#' ageend = "to", agerange = "excluded")
#'
#' # plot band with ggplot
#' library(ggplot2)
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx),
#' linetype = 0, fill = "grey")
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx),
#' linetype = 0, fill = "grey")
#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx),
#' linetype = 0, fill = "grey")

#' @rdname halley.band
#' @export
halley.band <- function(x, n = 1000, uncert = 0.95, agebeg, ageend,
agerange = "excluded") {
asd <- data.frame(x)

# Change the names of agebeg and ageend for further processes to "beg" and "ende".
names(asd)[which(names(asd)==agebeg)] <- "beg"
names(asd)[which(names(asd)==ageend)] <- "ende"

# Defines if the max of the age ranges is inclusive or exclusive.
if(agerange == "excluded"){
asd$ende = asd$ende + 1
}

low_q <- ( 1 - uncert ) / 2
up_q <- 1 - ( 1 - uncert ) / 2

demo_sim_list <- list()
for (i in 1:n) {
# Create the demo_sim_sim data frame
demo_sim_sim <- data.frame(ind = 1:nrow(asd))
demo_sim_sim$age <- round(stats::runif(nrow(asd), min = asd$beg, max = asd$ende))

# Create necdf
age_counts <- table(demo_sim_sim$age)
necdf <- data.frame(
a = 1,
age = as.numeric(names(age_counts)),
Dx = as.numeric(age_counts)
)

# Add computed columns to necdf
necdf$dx <- necdf$Dx / sum(necdf$Dx) * 100
necdf$lx <- c(100, 100 - cumsum(necdf$dx))[1:nrow(necdf)]
necdf$qx <- necdf$dx / necdf$lx * 100
necdf$Ax <- necdf$a / 2
necdf$Lx <- necdf$a * necdf$lx - ((necdf$a - necdf$Ax) * necdf$dx)

# Store necdf in the list
demo_sim_list[[i]] <- necdf
}

# Combine all data frames in the list into one
demo_sim_all <- do.call(rbind, demo_sim_list)

# Prepare for summarization
output <- data.frame()

# Loop through unique ages to calculate quantiles
unique_ages <- sort(unique(demo_sim_all$age))
for (age in unique_ages) {
# Subset data for the current age
age_data <- demo_sim_all[demo_sim_all$age == age, ]

# Calculate quantiles
lower_dx <- stats::quantile(age_data$dx, probs = low_q)
upper_dx <- stats::quantile(age_data$dx, probs = up_q)
lower_qx <- stats::quantile(age_data$qx, probs = low_q)
upper_qx <- stats::quantile(age_data$qx, probs = up_q)
lower_lx <- stats::quantile(age_data$lx, probs = low_q)
upper_lx <- stats::quantile(age_data$lx, probs = up_q)

# Append the results to the output
output <- rbind(output, data.frame(
age = age,
lower_dx = lower_dx,
upper_dx = upper_dx,
lower_qx = lower_qx,
upper_qx = upper_qx,
lower_lx = lower_lx,
upper_lx = upper_lx
))
}

return(output)
}
29 changes: 26 additions & 3 deletions R/lifetable_indices.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#' \item \bold{D0_14_D}: proportion of individuals aged 0--14
#' according to \emph{McFadden & Oxenham 2018a} if infants are represented
#' well.
#' \item \bold{D15_49_D15plus}: proportion of individuals aged 15--49
#' according to \emph{Taylor & Oxenham 2024}.
#' \item \bold{e0}: life expectancy at age 0.
#'}
#'
Expand All @@ -34,6 +36,8 @@
#'
#' \insertRef{mcfadden_oxenham_2018a}{mortAAR}
#'
#' \insertRef{taylor_oxenham_2024}{mortAAR}
#'
#' @examples
#' schleswig <- life.table(schleswig_ma[c("a", "Dx")])
#' lt.indices(schleswig)
Expand Down Expand Up @@ -63,6 +67,8 @@ lt.indices.mortaar_life_table_list <- function(life_table) {
#' @noRd
lt.indices.mortaar_life_table <- function(life_table) {

# please note that "all_age" denotes the upper limit of the age categories,
# so the queries for the indices are counterintuitive.
all_age <- life_table$a %>% cumsum

# Children index according to Masset and Bocquet 1977
Expand All @@ -75,7 +81,7 @@ lt.indices.mortaar_life_table <- function(life_table) {
d20plus <- life_table$Dx[all_age > 20] %>% sum
d5_14_d20plus <- d5_14 / d20plus

# Senility index according to Masset and Bocquet 1977
# Senility index according to Masset and Bocquet 1977
d60plus <- life_table$Dx[all_age > 60] %>% sum
d60_d20plus <- d60plus / d20plus

Expand All @@ -94,6 +100,11 @@ lt.indices.mortaar_life_table <- function(life_table) {
d0plus <- life_table$Dx %>% sum
D0_14_D <- d0_14 / d0plus

# D15_49_D15plus index according to Taylor and Oxenham 2024
d15_49 <- life_table$Dx[all_age >= 20 & all_age <= 50] %>% sum
d15plus <- life_table$Dx[all_age >= 20] %>% sum
D15_49_D15plus <- d15_49 / d15plus

# Life expectancy at age 0
e0 <- life_table$ex[[1]]

Expand All @@ -103,7 +114,8 @@ lt.indices.mortaar_life_table <- function(life_table) {
juvenile_i = d5_14_d20plus, d5_14 = d5_14, d20plus = d20plus,
senility_i = d60_d20plus, d0plus= d0plus, d60plus = d60plus,
p5_19 = p5_19,D30_D5 = D30_D5,
D0_14_D = D0_14_D, d0_14 = d0_14,
D0_14_D = D0_14_D, d0_14 = d0_14,
D15_49_D15plus = D15_49_D15plus,
e0 = e0
)

Expand Down Expand Up @@ -131,6 +143,8 @@ lt.indices.mortaar_life_table <- function(life_table) {
#' @keywords internal
lt.mortality <- function(life_table) {

indx <- lt.indices(life_table)

all_age <- life_table$a %>% cumsum

# Indices for representativity after Weiss 1973 and Model life tables
Expand All @@ -149,6 +163,15 @@ lt.mortality <- function(life_table) {
lx10 <- life_table$lx[all_age == 15]
q15_45 <- d15_45 / lx10 * 100

result_list <- list(q0_5 = q0_5, q10_5 = q10_5, q15_5 = q15_5, q15_45 = q15_45)
# Total fertility rate from subadults and adults
TFR_subadult <- indx$D0_14_D * 7.210 + 2.381
TFR_adult <- indx$D15_49_D15plus * 8.569 + 2.578

result_list <- list(q0_5 = q0_5,
q10_5 = q10_5,
q15_5 = q15_5,
q15_45 = q15_45,
TFR_subadult = TFR_subadult,
TFR_adult = TFR_adult)
result_list
}
92 changes: 92 additions & 0 deletions R/population_simulation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#' Simulation of a population of adults with Gompertz distribution
#'
#' In many instances, it is useful to calculate with a population with
#' known parameters. To generate a population with realistic
#' characteristics is less obvious than it seems. We operate here
#' with the Gompertz distribution which provides a reasonable
#' approximation of human mortality for \bold{adult} mortality, that is
#' for the ages >= 15 years. The user has to specify
#' either the parameter b or the modal age M. The modal age M is
#' particular useful as it provides an intuitive understanding of
#' the resulting age distribution. In both instances, the second
#' parameter a is generated by the regression formula found by
#' \emph{Sasaki and Kondo 2016}. If neither is given, a population
#' with random parameters realistic for pre-modern times is generated.
#'
#' @param n number of individuals to be simulated.
#'
#' @param b numeric, optional. Gompertz parameter controlling the
#' level of mortality.
#'
#' @param M numeric, optional. Modal age M.
#'
#' @param start_age numeric. Start age, default: 15 years.
#'
#' @param max_age numeric. Maximal age, to avoid unlikely centenaries,
#' default: 100 years.
#'
#' @return
#' A list of two data.frames with the following items:
#'
#' First data.frame
#' \itemize{
#' \item \bold{N}: Number of individuals.
#' \item \bold{b}: Gompertz parameter controlling mortality.
#' \item \bold{M}: Modal age.
#' \item \bold{a}: Gompertz parameter controlling hazard of the
#' youngest age group.
#' }
#'
#'Second data.frame
#' \itemize{
#' \item \bold{ind}: ID of individuals.
#' \item \bold{age}: Simulated absolute age.
#' }
#'
#' @references
#'
#' \insertRef{sasaki_kondo_2016}{mortAAR}
#'
#' @examples
#'
#' pop_sim <- pop.sim.gomp(n = 10000, M = 35)
#' pop_sim <- pop.sim.gomp(n = 10000, b = 0.03)
#' pop_sim <- pop.sim.gomp(n = 10000)

#' @rdname pop.sim.gomp
#' @export
pop.sim.gomp <- function(n, b = NULL, M = NULL, start_age = 15, max_age = 100) {
if ( length(M) > 0) {
M_1 <- 0
M_2 <- 0
while ( M < M_1 | M > M_2 ) {
b <- stats::runif(n = 1, min = 0.02, max = 0.1)
a <- exp(stats::rnorm(1, (-66.77 * (b - 0.0718) - 7.119), 0.0823))
M_ <- 1 / b * log (b/a) + start_age
M_1 <- M_ - 0.001
M_2 <- M_ + 0.001
}
} else if (length(b) > 0) {
a <- exp(stats::rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
M <- 1 / b * log (b/a) + start_age
} else {
b <- stats::runif(n = 1, min = 0.02, max = 0.05)
a <- exp(stats::rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
M <- 1 / b * log (b/a) + start_age
}
count = 1
lt_result <- matrix(NA, n, 2)
while (count < (n + 1)) {
age <- round(flexsurv::rgompertz(1, b, a) ) + start_age
if (age < max_age) {
lt_result[count,] <- c(ind = count, age = age)
count <- count + 1
}
}
lt_result <- data.frame(lt_result)
colnames(lt_result) <- c("ind", "age")
lt_values <- data.frame(n = n, b = b, a = a, M = M, start_age = start_age,
max_age = max_age)
lt_list <- list(values = lt_values, result = lt_result)
return(lt_list)
}
Loading

0 comments on commit e96f7c8

Please sign in to comment.