Merge pull request #66 from ISAAKiel/pop.sim

Pop.sim merged with master branch.
ISAAKiel · Jan 11, 2025 · e96f7c8 · e96f7c8
2 parents 00a4fe7 + 6069add
commit e96f7c8
Show file tree

Hide file tree

Showing 19 changed files with 765 additions and 172 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mortAAR
 Type: Package
 Title: Analysis of Archaeological Mortality Data
-Version: 1.1.6
+Version: 1.1.7
 Authors@R: c(
   person("Nils", "Mueller-Scheessel", email = "[email protected]", role = c("aut", "cre", "cph")), 
   person("Martin", "Hinz", email = "[email protected]", role = c("aut")),
@@ -19,7 +19,7 @@ Description: A collection of functions for the analysis of archaeological mortal
     <https://books.google.de/books?id=nG5FoO_becAC&lpg=PA27&ots=LG0b_xrx6O&dq=life%20table%20archaeology&pg=PA27#v=onepage&q&f=false>). 
     It takes demographic data in different formats and displays the result in a standard life table
     as well as plots the relevant indices (percentage of deaths, survivorship, probability of death, life expectancy, percentage of population).
-Date: 2024-03-06
+Date: 2024-11-28
 License: GPL-3 | file LICENSE
 Encoding: UTF-8
 LazyData: true
@@ -29,7 +29,9 @@ Imports:
     reshape2 (>= 1.4.2),
     methods (>= 3.3.3),
     tibble (>= 3.0.3),
-    rlang (>= 1.1.1)
+    rlang (>= 1.1.1),
+    flexsurv (>= 2.2.2),
+    stats (>= 4.3.1)
 RoxygenNote: 7.2.3
 Suggests:
     testthat,

diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ S3method(print,mortaar_life_table)
 S3method(print,mortaar_life_table_list)
 export(as.mortaar_life_table)
 export(as.mortaar_life_table_list)
+export(halley.band)
 export(is.mortaar_life_table)
 export(is.mortaar_life_table_list)
 export(life.table)
@@ -39,7 +40,10 @@ export(lt.population_size)
 export(lt.representativity)
 export(lt.reproduction)
 export(lt.sexrelation)
+export(pop.sim.gomp)
 export(prep.life.table)
+export(random.cat)
+export(random.cat.apply)
 importFrom(Rdpack,reprompt)
 importFrom(graphics,axis)
 importFrom(graphics,grid)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# mortAAR 1.1.7
+- added function for Halley bands
+- added function for population simulation
+- added helper functions for the random generating and applying of age categories
+- added reprasentativity check via Total fertility rate
+
 # mortAAR 1.1.6
 - fixed the error in plotting that occurred after the fix of "aes_string" of ggplot2
   and that still persisted

diff --git a/R/halley_band.R b/R/halley_band.R
@@ -0,0 +1,143 @@
+#' Halley band of the mortality profile of a skeletal population
+#'
+#' In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)}
+#' proposed a method they called 'Halley band' as alternative for other
+#' methods of sampling from an skeletal population. It basically involves
+#' sampling n times from the age-estimation of each individual and then
+#' only taking the 2.5th and 97.5th percentile into account. The space
+#' between, they dubbed 'Halley band' but pointed out that it
+#' is not to be confused with confidence intervals.
+#'
+#' @param x a data.frame with individuals and age estimations.
+#'
+#' @param n number of runs, default: 1000.
+#'
+#' @param uncert level of uncertainty, default: 0.95.
+#'
+#' @param agebeg numeric. Starting age of the respective individual.
+#'
+#' @param ageend numeric. Closing age of the respective individual.
+#'
+#' @param agerange character. Determination if the closing
+#' age leaves a gap to the following age category. If yes (= "excluded"),
+#' "1" is added to avoid gaps, default: "excluded".
+#'
+#' @return
+#' One data.frame with the following items:
+#'
+#' \itemize{
+#'   \item \bold{age}:  age in years.
+#'   \item \bold{lower_dx}: Lower boundary of uncertainty for dx.
+#'   \item \bold{upper_dx}: Upper boundary of uncertainty for dx.
+#'   \item \bold{lower_qx}: Lower boundary of uncertainty for qx.
+#'   \item \bold{upper_qx}: Upper boundary of uncertainty for qx.
+#'   \item \bold{lower_lx}: Lower boundary of uncertainty for lx.
+#'   \item \bold{upper_lx}: Upper boundary of uncertainty for lx.
+#'  }
+#'
+#' @references
+#'
+#' \insertRef{Luy_Wittwer-Backofen_2005}{mortAAR}
+#'
+#' \insertRef{Luy_Wittwer-Backofen_2008}{mortAAR}
+#'
+#' @examples
+#'
+#'# create simulated population with artifical coarsening first
+#' pop_sim <- pop.sim.gomp(n = 1000)
+#' sim_ranges <- random.cat()
+#'
+#' # apply random age categories to simulated ages
+#' sim_appl <- random.cat.apply(pop_sim$result, age = "age",
+#' age_ranges = sim_ranges, from = "from", to = "to")
+#'
+#' # create halley bands
+#' demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from",
+#' ageend = "to", agerange = "excluded")
+#'
+#' # plot band with ggplot
+#' library(ggplot2)
+#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx),
+#' linetype = 0, fill = "grey")
+#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx),
+#' linetype = 0, fill = "grey")
+#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx),
+#' linetype = 0, fill = "grey")
+
+#' @rdname halley.band
+#' @export
+halley.band <- function(x, n = 1000, uncert = 0.95, agebeg, ageend,
+                        agerange = "excluded") {
+  asd <- data.frame(x)
+
+  # Change the names of agebeg and ageend for further processes to "beg" and "ende".
+  names(asd)[which(names(asd)==agebeg)] <- "beg"
+  names(asd)[which(names(asd)==ageend)] <- "ende"
+
+  # Defines if the max of the age ranges is inclusive or exclusive.
+  if(agerange == "excluded"){
+    asd$ende = asd$ende + 1
+  }
+
+  low_q <- ( 1 - uncert ) / 2
+  up_q <- 1 - ( 1 - uncert ) / 2
+
+  demo_sim_list <- list()
+  for (i in 1:n) {
+    # Create the demo_sim_sim data frame
+    demo_sim_sim <- data.frame(ind = 1:nrow(asd))
+    demo_sim_sim$age <- round(stats::runif(nrow(asd), min = asd$beg, max = asd$ende))
+
+    # Create necdf
+    age_counts <- table(demo_sim_sim$age)
+    necdf <- data.frame(
+      a = 1,
+      age = as.numeric(names(age_counts)),
+      Dx = as.numeric(age_counts)
+    )
+
+    # Add computed columns to necdf
+    necdf$dx <- necdf$Dx / sum(necdf$Dx) * 100
+    necdf$lx <- c(100, 100 - cumsum(necdf$dx))[1:nrow(necdf)]
+    necdf$qx <- necdf$dx / necdf$lx * 100
+    necdf$Ax <- necdf$a / 2
+    necdf$Lx <- necdf$a * necdf$lx - ((necdf$a - necdf$Ax) * necdf$dx)
+
+    # Store necdf in the list
+    demo_sim_list[[i]] <- necdf
+  }
+
+  # Combine all data frames in the list into one
+  demo_sim_all <- do.call(rbind, demo_sim_list)
+
+  # Prepare for summarization
+  output <- data.frame()
+
+  # Loop through unique ages to calculate quantiles
+  unique_ages <- sort(unique(demo_sim_all$age))
+  for (age in unique_ages) {
+    # Subset data for the current age
+    age_data <- demo_sim_all[demo_sim_all$age == age, ]
+
+    # Calculate quantiles
+    lower_dx <- stats::quantile(age_data$dx, probs = low_q)
+    upper_dx <- stats::quantile(age_data$dx, probs = up_q)
+    lower_qx <- stats::quantile(age_data$qx, probs = low_q)
+    upper_qx <- stats::quantile(age_data$qx, probs = up_q)
+    lower_lx <- stats::quantile(age_data$lx, probs = low_q)
+    upper_lx <- stats::quantile(age_data$lx, probs = up_q)
+
+    # Append the results to the output
+    output <- rbind(output, data.frame(
+      age = age,
+      lower_dx = lower_dx,
+      upper_dx = upper_dx,
+      lower_qx = lower_qx,
+      upper_qx = upper_qx,
+      lower_lx = lower_lx,
+      upper_lx = upper_lx
+    ))
+  }
+
+  return(output)
+}
diff --git a/R/lifetable_indices.R b/R/lifetable_indices.R
@@ -21,6 +21,8 @@
 #'   \item \bold{D0_14_D}:   proportion of individuals aged 0--14
 #'   according to \emph{McFadden & Oxenham 2018a} if infants are represented
 #'   well.
+#'   \item \bold{D15_49_D15plus}:   proportion of individuals aged 15--49
+#'   according to \emph{Taylor & Oxenham 2024}.
 #'   \item \bold{e0}:   life expectancy at age 0.
 #'}
 #'
@@ -34,6 +36,8 @@
 #'
 #' \insertRef{mcfadden_oxenham_2018a}{mortAAR}
 #'
+#' \insertRef{taylor_oxenham_2024}{mortAAR}
+#'
 #' @examples
 #' schleswig <- life.table(schleswig_ma[c("a", "Dx")])
 #' lt.indices(schleswig)
@@ -63,6 +67,8 @@ lt.indices.mortaar_life_table_list <- function(life_table) {
 #' @noRd
 lt.indices.mortaar_life_table <- function(life_table) {
 
+  # please note that "all_age" denotes the upper limit of the age categories,
+  # so the queries for the indices are counterintuitive.
   all_age <- life_table$a %>% cumsum
 
   # Children index according to Masset and Bocquet 1977
@@ -75,7 +81,7 @@ lt.indices.mortaar_life_table <- function(life_table) {
   d20plus <- life_table$Dx[all_age > 20] %>% sum
   d5_14_d20plus <- d5_14 / d20plus
 
-    # Senility index according to Masset and Bocquet 1977
+  # Senility index according to Masset and Bocquet 1977
   d60plus <- life_table$Dx[all_age > 60] %>% sum
   d60_d20plus <- d60plus / d20plus
 
@@ -94,6 +100,11 @@ lt.indices.mortaar_life_table <- function(life_table) {
   d0plus <- life_table$Dx %>% sum
   D0_14_D <- d0_14 / d0plus
 
+  # D15_49_D15plus index according to Taylor and Oxenham 2024
+  d15_49 <- life_table$Dx[all_age >= 20 & all_age <= 50] %>% sum
+  d15plus <- life_table$Dx[all_age >= 20] %>% sum
+  D15_49_D15plus <- d15_49 / d15plus
+
   # Life expectancy at age 0
   e0 <- life_table$ex[[1]]
 
@@ -103,7 +114,8 @@ lt.indices.mortaar_life_table <- function(life_table) {
     juvenile_i = d5_14_d20plus, d5_14 = d5_14, d20plus = d20plus,
     senility_i = d60_d20plus, d0plus= d0plus, d60plus = d60plus,
     p5_19 = p5_19,D30_D5 = D30_D5,
-     D0_14_D = D0_14_D, d0_14 = d0_14,
+    D0_14_D = D0_14_D, d0_14 = d0_14,
+    D15_49_D15plus = D15_49_D15plus,
     e0 = e0
   )
 
@@ -131,6 +143,8 @@ lt.indices.mortaar_life_table <- function(life_table) {
 #' @keywords internal
 lt.mortality <- function(life_table) {
 
+  indx <- lt.indices(life_table)
+
   all_age <- life_table$a %>% cumsum
 
   # Indices for representativity after Weiss 1973 and Model life tables
@@ -149,6 +163,15 @@ lt.mortality <- function(life_table) {
   lx10 <- life_table$lx[all_age == 15]
   q15_45 <- d15_45 / lx10 * 100
 
-  result_list <- list(q0_5 = q0_5, q10_5 = q10_5, q15_5 = q15_5, q15_45 = q15_45)
+  # Total fertility rate from subadults and adults
+  TFR_subadult <-  indx$D0_14_D * 7.210 + 2.381
+  TFR_adult <- indx$D15_49_D15plus * 8.569 + 2.578
+
+  result_list <- list(q0_5 = q0_5,
+                      q10_5 = q10_5,
+                      q15_5 = q15_5,
+                      q15_45 = q15_45,
+                      TFR_subadult = TFR_subadult,
+                      TFR_adult = TFR_adult)
   result_list
 }
diff --git a/R/population_simulation.R b/R/population_simulation.R
@@ -0,0 +1,92 @@
+#' Simulation of a population of adults with Gompertz distribution
+#'
+#' In many instances, it is useful to calculate with a population with
+#' known parameters. To generate a population with realistic
+#' characteristics is less obvious than it seems. We operate here
+#' with the Gompertz distribution which provides a reasonable
+#' approximation of human mortality for \bold{adult} mortality, that is
+#' for the ages >= 15 years. The user has to specify
+#' either the parameter b or the modal age M. The modal age M is
+#' particular useful as it provides an intuitive understanding of
+#' the resulting age distribution. In both instances, the second
+#' parameter a is generated by the regression formula found by
+#' \emph{Sasaki and Kondo 2016}. If neither is given, a population
+#' with random parameters realistic for pre-modern times is generated.
+#'
+#' @param n number of individuals to be simulated.
+#'
+#' @param b numeric, optional. Gompertz parameter controlling the
+#' level of mortality.
+#'
+#' @param M numeric, optional. Modal age M.
+#'
+#' @param start_age numeric. Start age, default: 15 years.
+#'
+#' @param max_age numeric. Maximal age, to avoid unlikely centenaries,
+#' default: 100 years.
+#'
+#' @return
+#' A list of two data.frames with the following items:
+#'
+#' First data.frame
+#' \itemize{
+#'   \item \bold{N}:  Number of individuals.
+#'   \item \bold{b}:  Gompertz parameter controlling mortality.
+#'   \item \bold{M}:  Modal age.
+#'   \item \bold{a}:  Gompertz parameter controlling hazard of the
+#'   youngest age group.
+#'  }
+#'
+#'Second data.frame
+#' \itemize{
+#'   \item \bold{ind}:  ID of individuals.
+#'   \item \bold{age}:  Simulated absolute age.
+#'  }
+#'
+#' @references
+#'
+#' \insertRef{sasaki_kondo_2016}{mortAAR}
+#'
+#' @examples
+#'
+#' pop_sim <- pop.sim.gomp(n = 10000, M = 35)
+#' pop_sim <- pop.sim.gomp(n = 10000, b = 0.03)
+#' pop_sim <- pop.sim.gomp(n = 10000)
+
+#' @rdname pop.sim.gomp
+#' @export
+pop.sim.gomp <- function(n, b = NULL, M = NULL, start_age = 15, max_age = 100) {
+  if ( length(M) > 0) {
+    M_1 <- 0
+    M_2 <- 0
+    while ( M < M_1 | M > M_2 ) {
+      b <- stats::runif(n = 1, min = 0.02, max = 0.1)
+      a <- exp(stats::rnorm(1, (-66.77 * (b - 0.0718) - 7.119), 0.0823))
+      M_ <- 1 / b * log (b/a) + start_age
+      M_1 <- M_ - 0.001
+      M_2 <- M_ + 0.001
+    }
+  } else if (length(b) > 0) {
+    a <- exp(stats::rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
+    M <- 1 / b * log (b/a) + start_age
+  } else {
+    b <- stats::runif(n = 1, min = 0.02, max = 0.05)
+    a <- exp(stats::rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) )
+    M <- 1 / b * log (b/a) + start_age
+  }
+  count = 1
+  lt_result <- matrix(NA, n, 2)
+  while (count < (n + 1)) {
+    age <- round(flexsurv::rgompertz(1, b, a) ) + start_age
+    if (age < max_age) {
+    lt_result[count,] <- c(ind = count, age = age)
+    count <- count + 1
+    }
+  }
+  lt_result <- data.frame(lt_result)
+  colnames(lt_result) <- c("ind", "age")
+  lt_values <- data.frame(n = n, b = b, a = a, M = M, start_age = start_age,
+                          max_age = max_age)
+  lt_list <- list(values = lt_values, result = lt_result)
+  return(lt_list)
+}