diff --git a/Practicals/JudithNeve/BeamerExample.zip b/Practicals/JudithNeve/BeamerExample.zip new file mode 100644 index 0000000..89dba14 Binary files /dev/null and b/Practicals/JudithNeve/BeamerExample.zip differ diff --git a/Practicals/JudithNeve/BeamerExample/main.tex b/Practicals/JudithNeve/BeamerExample/main.tex new file mode 100644 index 0000000..b293358 --- /dev/null +++ b/Practicals/JudithNeve/BeamerExample/main.tex @@ -0,0 +1,97 @@ +%\documentclass{beamer} +\documentclass[aspectratio=169]{beamer} % took this from the solution +\usetheme{default} +\usecolortheme{beaver} + + +\title{Example document to recreate with \texttt{beamer} in \LaTeX} +\author{Judith Neve} +%\institute{ +% Markup Languages and Reproducible Programming in Statistics +%} +\date{\vspace{.5 in}\\ November 2022 \\ Markup Languages and Reproducible Programming in Statistics \vskip6mm} % this was taken from the solution + +\beamertemplatenavigationsymbolsempty %suppress navigation bar +% this was taken from the solution + +\begin{document} + +%\begin{frame}[plain] + \titlepage +%\end{frame} +% commenting these out made it start lower on the slide + +\begin{frame}{Outline} +%\tableofcontents +Working with equations\\ +\hspace*{20pt} Aligning the same equation\\ +\hspace*{20pt} Omit equation numbering\\ +\hspace*{20pt} Ugly alignment\\ +\vspace*{20pt} +Discussion + +\end{frame} +% could have made subsections and not given names to the frames +\begin{frame}{Working with equations} + +We define a set of equations as + +\begin{equation} + a = b + c^2 +\end{equation} +\begin{equation} + a - c^2 = b +\end{equation} +\begin{equation} + \text{left side} = \text{right side} +\end{equation} +\begin{equation} + \text{left side} + \text{something} \geq \text{right side} +\end{equation} +for all something $>$ 0. + +\end{frame} + +\begin{frame}{Aligning the same equations} + +Aligning the equations by the equal sign gives a much better view into the placement of the separate equation components. + +\begin{align} + a & = b + c^2\\ + a - c^2 & = b\\ + \text{left side} & = \text{right side}\\ + \text{left side} + \text{something} & \geq \text{right side} +\end{align} + +\end{frame} + +\begin{frame}{Omit equation numbering} + Alternatively, the equation numbering can be omitted. + \begin{align*} + a & = b + c^2\\ + a - c^2 & = b\\ + \text{left side} & = \text{right side}\\ + \text{left side} + \text{something} & \geq \text{right side} +\end{align*} +\end{frame} + +\begin{frame}{Ugly alignment} + Some components do not look well, when aligned. Especially equations with different heights and spacing. For example, +\begin{align} + E = mc^2\\ + m = \frac{E}{c^2}\\ + c = \sqrt{\frac{E}{M}} +\end{align} +Take that into account. +\end{frame} + +\begin{frame}{Discussion} + This is where you'd normally give your audience a recap of your talk, where you could discuss e.g. the following\begin{itemize} + \item Your main findings + \item The consequences of your main findings + \item Things to do + \item Any other business not currently investigated, but related to your talk + \end{itemize} +\end{frame} + +\end{document} \ No newline at end of file diff --git a/Practicals/JudithNeve/JudithNeve.Rproj b/Practicals/JudithNeve/JudithNeve.Rproj new file mode 100644 index 0000000..8f7268f --- /dev/null +++ b/Practicals/JudithNeve/JudithNeve.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: ISO8859-1 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/Practicals/JudithNeve/Markup_A1.Rmd b/Practicals/JudithNeve/Markup_A1.Rmd new file mode 100644 index 0000000..1623ddc --- /dev/null +++ b/Practicals/JudithNeve/Markup_A1.Rmd @@ -0,0 +1,54 @@ +--- +title: "Markup Assignment 1" +author: "Judith Neve" +date: '2022-09-19' +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +```{r} +# sample 100 samples from a normal distribution +set.seed(0070661) +n <- 5000 +sim_dat <- matrix(NA, n, 100) +for (i in 1:100) { + sim_dat[,i] <- rnorm(n) +} +``` + +```{r} +sim_stats <- matrix(NA, 100, 4) +colnames(sim_stats) <- c("AbsoluteBias", "StandardError", "LowerBound", "UpperBound") + +# for each sample, calculate: +for (i in 1:100) { + sim_stats[i,1:2] <- c(mean(sim_dat[,i]), # absolute bias (the mean was 0 so the mean of the sample works) + 1/sqrt(n)) # standard error + sim_stats[i,3:4] <- c(sim_stats[i,1] - qt(.975, n-1)*sim_stats[i,2], # lower bound of the CI + sim_stats[i,1] + qt(.975, n-1)*sim_stats[i,2]) # upper bound of the CI +} +``` + +```{r} +# create a plot that demonstrates the following: +## “A replication of the procedure that generates a 95% confidence interval that is centered around the sample mean would cover the population value at least 95 out of 100 times” (Neyman, 1934) + +library(tidyverse) + +sim_stats <- sim_stats %>% + as.data.frame() %>% + mutate(MeanInInt = ifelse(0 > LowerBound & 0 < UpperBound, TRUE, FALSE)) + +sim_stats %>% + ggplot(aes(x = 1:100, y = AbsoluteBias)) + + geom_pointrange(aes(ymin = LowerBound, ymax = UpperBound, col = MeanInInt)) +``` + +```{r} +# Present a table containing all simulated samples for which the resulting confidence interval does not contain the population value +othersamples <- sim_dat[,sim_stats$MeanInInt == FALSE] +``` + diff --git a/Practicals/JudithNeve/Markup_A2.Rmd b/Practicals/JudithNeve/Markup_A2.Rmd new file mode 100644 index 0000000..b0fa30b --- /dev/null +++ b/Practicals/JudithNeve/Markup_A2.Rmd @@ -0,0 +1,91 @@ +--- +title: "Practical 2" +author: "Judith Neve" +date: '2022-10-06' +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +# Introduction + +## Aim + +In this study, I aim to empirically show the proportion of observations within 1, 2, and 3 standard deviations of the mean in a normal distribution. These proportions are expected to be approximately 68, 95, and 99, respectively. + +# Methods + +## Data-generating mechanism + +The `rnorm()` function is used to generate observations from a normal distribution. We generate three vectors with 1000 observations, each following a different distribution: +* A standard normal distribution, +* A normal distribution with mean 100 and standard deviation 50, +* A normal distribution with mean 10 and standard deviation 1. + +```{r} +set.seed(9) # the number of books on my bedside table + +StandardNormal <- rnorm(1000) +Mean100SD50 <- rnorm(1000, 100, 50) +Mean10SD1 <- rnorm(1000, 10, 1) +``` + +## Estimand + +The estimator is the proportion of observations within 1, 2, or 3 standard deviations of the mean for each vector. We will have 9 estimates. The function in the chunk below will be used. + +```{r} +propwithinSD <- function(vector, mean = 0, SD = 1, within = 1) { + vector <- abs(vector - mean) # we calculate the absolute difference between each observation and the mean + SD <- SD*within # we set how far we can stray from the mean + in_vector <- ifelse(vector > SD, 0, 1) # this will give a vector where 1 is an observation within the desired number of SDs + prop <- sum(in_vector) / length(in_vector) + return(prop) +} +``` + +## Method + +I am trying to follow an ADEMP approach but I'm unsure this part is applicable to this study. Maybe if this was comparing different generator functions? + +## Performance measures + +I am trying to follow an ADEMP approach but I'm unsure this part is applicable to this study. Maybe if this was comparing different generator functions and trying to assess the one closest to the normal distribution? + +# Results + +We first look at the proportion of observations within 1 SD for each vector. This should be around 68. + +```{r} +propwithinSD(StandardNormal, 0, 1, 1) +propwithinSD(Mean100SD50, 100, 50, 1) +propwithinSD(Mean10SD1, 10, 1, 1) +``` + +We are indeed around 68%. + +Looking now at the proportion of observations within 2SD for each vector, + +```{r} +propwithinSD(StandardNormal, 0, 1, 2) +propwithinSD(Mean100SD50, 100, 50, 2) +propwithinSD(Mean10SD1, 10, 1, 2) +``` +we find they are all close to 95%, as expected. + +Finally, looking at the proportion of observations within 3SD for each vector, + +```{r} +propwithinSD(StandardNormal, 0, 1, 3) +propwithinSD(Mean100SD50, 100, 50, 3) +propwithinSD(Mean10SD1, 10, 1, 3) +``` + +we observe they are around 99%, as expected. + +```{r} +sessionInfo() +``` + diff --git a/Practicals/JudithNeve/Markup_A2.html b/Practicals/JudithNeve/Markup_A2.html new file mode 100644 index 0000000..d082757 --- /dev/null +++ b/Practicals/JudithNeve/Markup_A2.html @@ -0,0 +1,521 @@ + + + + + + + + + + + + + + + +Practical 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Introduction

+
+

Aim

+

In this study, I aim to empirically show the proportion of +observations within 1, 2, and 3 standard deviations of the mean in a +normal distribution. These proportions are expected to be approximately +68, 95, and 99, respectively.

+
+
+
+

Methods

+
+

Data-generating mechanism

+

The rnorm() function is used to generate observations +from a normal distribution. We generate three vectors with 1000 +observations, each following a different distribution: * A standard +normal distribution, * A normal distribution with mean 100 and standard +deviation 50, * A normal distribution with mean 10 and standard +deviation 1.

+
set.seed(9) # the number of books on my bedside table
+
+StandardNormal <- rnorm(1000)
+Mean100SD50 <- rnorm(1000, 100, 50)
+Mean10SD1 <- rnorm(1000, 10, 1)
+
+
+

Estimand

+

The estimator is the proportion of observations within 1, 2, or 3 +standard deviations of the mean for each vector. We will have 9 +estimates. The function in the chunk below will be used.

+
propwithinSD <- function(vector, mean = 0, SD = 1, within = 1) {
+  vector <- abs(vector - mean) # we calculate the absolute difference between each observation and the mean
+  SD <- SD*within # we set how far we can stray from the mean
+  in_vector <- ifelse(vector > SD, 0, 1) # this will give a vector where 1 is an observation within the desired number of SDs
+  prop <- sum(in_vector) / length(in_vector)
+  return(prop)
+}
+
+
+

Method

+

I am trying to follow an ADEMP approach but I’m unsure this part is +applicable to this study. Maybe if this was comparing different +generator functions?

+
+
+

Performance measures

+

I am trying to follow an ADEMP approach but I’m unsure this part is +applicable to this study. Maybe if this was comparing different +generator functions and trying to assess the one closest to the normal +distribution?

+
+
+
+

Results

+

We first look at the proportion of observations within 1 SD for each +vector. This should be around 68.

+
propwithinSD(StandardNormal, 0, 1, 1)
+
## [1] 0.701
+
propwithinSD(Mean100SD50, 100, 50, 1)
+
## [1] 0.689
+
propwithinSD(Mean10SD1, 10, 1, 1)
+
## [1] 0.662
+

We are indeed around 68%.

+

Looking now at the proportion of observations within 2SD for each +vector,

+
propwithinSD(StandardNormal, 0, 1, 2)
+
## [1] 0.962
+
propwithinSD(Mean100SD50, 100, 50, 2)
+
## [1] 0.957
+
propwithinSD(Mean10SD1, 10, 1, 2)
+
## [1] 0.958
+

we find they are all close to 95%, as expected.

+

Finally, looking at the proportion of observations within 3SD for +each vector,

+
propwithinSD(StandardNormal, 0, 1, 3)
+
## [1] 0.999
+
propwithinSD(Mean100SD50, 100, 50, 3)
+
## [1] 0.997
+
propwithinSD(Mean10SD1, 10, 1, 3)
+
## [1] 0.995
+

we observe they are around 99%, as expected.

+
sessionInfo()
+
## R version 4.1.2 (2021-11-01)
+## Platform: x86_64-w64-mingw32/x64 (64-bit)
+## Running under: Windows 10 x64 (build 19043)
+## 
+## Matrix products: default
+## 
+## locale:
+## [1] LC_COLLATE=French_France.1252  LC_CTYPE=French_France.1252   
+## [3] LC_MONETARY=French_France.1252 LC_NUMERIC=C                  
+## [5] LC_TIME=French_France.1252    
+## 
+## attached base packages:
+## [1] stats     graphics  grDevices utils     datasets  methods   base     
+## 
+## loaded via a namespace (and not attached):
+##  [1] digest_0.6.28   R6_2.5.1        jsonlite_1.7.3  magrittr_2.0.2 
+##  [5] evaluate_0.14   stringi_1.7.6   rlang_1.0.1     cli_3.1.1      
+##  [9] rstudioapi_0.13 jquerylib_0.1.4 bslib_0.3.1     rmarkdown_2.11 
+## [13] tools_4.1.2     stringr_1.4.0   xfun_0.29       yaml_2.2.1     
+## [17] fastmap_1.1.0   compiler_4.1.2  htmltools_0.5.2 knitr_1.37     
+## [21] sass_0.4.0
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/Practicals/JudithNeve/Placeholder.txt b/Practicals/JudithNeve/Placeholder.txt new file mode 100644 index 0000000..e69de29 diff --git a/Practicals/JudithNeve/Practical 8/Practical 8.Rproj b/Practicals/JudithNeve/Practical 8/Practical 8.Rproj new file mode 100644 index 0000000..8f7268f --- /dev/null +++ b/Practicals/JudithNeve/Practical 8/Practical 8.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: ISO8859-1 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/Practicals/JudithNeve/Practical 8/app.R b/Practicals/JudithNeve/Practical 8/app.R new file mode 100644 index 0000000..5a54c20 --- /dev/null +++ b/Practicals/JudithNeve/Practical 8/app.R @@ -0,0 +1,134 @@ +library(shiny) +library(tidyverse) + +# Define UI for app that draws a histogram ---- +ui <- fluidPage( + + # App title ---- + titlePanel("How do different hyperparameter combinations impact model predictive performance?"), + + # Sidebar layout with input and output definitions + sidebarLayout( + sidebarPanel = sidebarPanel( + checkboxGroupInput( + inputId = "measure", + label = "Performance measure", + choices = list( + "Discrimination (AUC)" = "AUC", + "Calibration (slope)" = "CalibrationSlope", + "Calibration (intercept)" = "CalibrationIntercept", + "Brier score" = "BrierScore", + "Logarithmic loss" = "LogarithmicLoss", + "Accuracy" = "Accuracy", + "Cohen's Kappa" = "CohensKappa" + ), + selected = "CalibrationSlope"), + radioButtons( + inputId = "unit", + label = "Unit of time", + choices = list( + "Seconds" = "seconds", + "Minutes" = "minutes", + "Hours" = "hours")) + + ), + + mainPanel( + plotOutput(outputId = "perfplot") + ) + ) +) + +server <- function(input, output) { + load("appdata.RData") + dat$`Tuned hyperparameters` <- ifelse(dat$`Tuned hyperparameters` == "", "None", dat$`Tuned hyperparameters`) + df_scale_setter <- tibble( + Accuracy = rep(c(0, 1), each = 9), + AUC = rep(c(0.5, 1), each = 9), + BrierScore = rep(c(0, 1), each = 9), + CalibrationIntercept = rep(c(-1, 1), each = 9), + CalibrationSlope = rep(c(0, 2), each = 9), + CohensKappa = rep(c(0, 1), each = 9), + LogarithmicLoss = rep(c(0, 1), each = 9), + Runtime = 0, + `Tuned hyperparameters` = rep(unique(dat$`Tuned hyperparameters`), 2) + ) %>% + pivot_longer(Accuracy:LogarithmicLoss, + names_to = "Metric", + values_to = "Performance") %>% + mutate(Metric = factor(Metric, levels = c( + "Accuracy", "AUC", "CalibrationSlope", "BrierScore", + "CalibrationIntercept", "LogarithmicLoss", "CohensKappa"))) + + Metric.labs <- c("Calibration slope", "Calibration intercept", "Brier score", + "Logarithmic loss", "Cohen's kappa", "AUC", "Classification accuracy") + names(Metric.labs) <- c("CalibrationSlope", "CalibrationIntercept", "BrierScore", + "LogarithmicLoss", "CohensKappa", "AUC", "Accuracy") + # change labels so the plot looks nice + HP.labs <- unique(dat$`Tuned hyperparameters`) + names(HP.labs) <- HP.labs + names(HP.labs)[2] <- "mtry + sample.fraction + replace\n+ min.node.size + splitrule" + HP.labs <- c(HP.labs[1], sort(HP.labs[2:9])) + # define the colour palette + HPcomb_pal <- viridis::turbo(n = 9) + df_targets <- tibble( + Accuracy = 1, + AUC = 1, + BrierScore = 0, + CalibrationIntercept = 0, + CalibrationSlope = 1, + CohensKappa = 1, + LogarithmicLoss = 0, + Runtime = 0, + `Tuned hyperparameters` = unique(dat$`Tuned hyperparameters`) + ) %>% + pivot_longer(Accuracy:LogarithmicLoss, + names_to = "Metric", + values_to = "Target") %>% + mutate(Metric = factor(Metric, levels = c("Accuracy", "AUC", "CalibrationSlope", + "BrierScore", "CalibrationIntercept", "LogarithmicLoss", + "CohensKappa"))) + + output$perfplot <- renderPlot({ + + dat %>% + mutate(Runtime = ifelse(rep(input$unit == "minutes", nrow(dat)), Runtime, + ifelse(rep(input$unit == "hours", nrow(dat)), Runtime/60, + Runtime*60))) %>% + pivot_longer(AUC:CohensKappa, + names_to = "Metric", + values_to = "Performance") %>% + mutate(Metric = factor(Metric, levels = c("AUC", "CalibrationSlope", "BrierScore", + "CalibrationIntercept", "Accuracy", "LogarithmicLoss", + "CohensKappa"))) %>% + filter(Metric %in% input$measure) %>% + ggplot(aes(x = Runtime, + y = Performance, + group = `Tuned hyperparameters`)) + + geom_point(aes(col = `Tuned hyperparameters`, + shape = `Tuned hyperparameters`)) + + geom_point(data = df_scale_setter %>% + filter(Metric %in% input$measure), + alpha = 0) + + geom_hline(data = df_targets %>% + filter(Metric %in% input$measure), + aes(yintercept = Target), + col = "red", lty = "dotted") + + facet_wrap(~ Metric, + scales = "free_y", + labeller = labeller(Metric = Metric.labs)) + + theme_classic() + + scale_color_manual(breaks = HP.labs, + labels = names(HP.labs), + values = HPcomb_pal) + + scale_shape_manual(breaks = HP.labs, + labels = names(HP.labs), + values = c(1, rep(16, 8))) + + labs(x = paste0("Runtime (", input$unit, ")")) + + }) + +} + + +shinyApp(ui = ui, server = server) \ No newline at end of file diff --git a/Practicals/JudithNeve/Practical 8/appdata.RData b/Practicals/JudithNeve/Practical 8/appdata.RData new file mode 100644 index 0000000..1440542 Binary files /dev/null and b/Practicals/JudithNeve/Practical 8/appdata.RData differ diff --git a/Practicals/JudithNeve/Week5/Thesis.bib b/Practicals/JudithNeve/Week5/Thesis.bib new file mode 100644 index 0000000..cc94926 --- /dev/null +++ b/Practicals/JudithNeve/Week5/Thesis.bib @@ -0,0 +1,848 @@ + +@article{van_calster_calibration_2019, + title = {Calibration: the {Achilles} heel of predictive analytics}, + volume = {17}, + issn = {1741-7015}, + shorttitle = {Calibration}, + url = {https://doi.org/10.1186/s12916-019-1466-7}, + doi = {10.1186/s12916-019-1466-7}, + abstract = {The assessment of calibration performance of risk prediction models based on regression or more flexible machine learning algorithms receives little attention.}, + language = {en}, + number = {1}, + urldate = {2022-09-05}, + journal = {BMC Medicine}, + author = {Van Calster, Ben and McLernon, David J. and van Smeden, Maarten and Wynants, Laure and Steyerberg, Ewout W. and Bossuyt, Patrick and Collins, Gary S. and Macaskill, Petra and McLernon, David J. and Moons, Karel G. M. and Steyerberg, Ewout W. and Van Calster, Ben and van Smeden, Maarten and Vickers, Andrew J. and {On behalf of Topic Group ‘Evaluating diagnostic tests and prediction models’ of the STRATOS initiative}}, + month = dec, + year = {2019}, + keywords = {Calibration, Heterogeneity, Model performance, Overfitting, Predictive analytics, Risk prediction models}, + pages = {230}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\U6Q7MIZV\\Van Calster et al. - 2019 - Calibration the Achilles heel of predictive analy.pdf:application/pdf}, +} + +@article{van_smeden_sample_2019, + title = {Sample size for binary logistic prediction models: {Beyond} events per variable criteria}, + volume = {28}, + issn = {0962-2802}, + shorttitle = {Sample size for binary logistic prediction models}, + url = {https://doi.org/10.1177/0962280218784726}, + doi = {10.1177/0962280218784726}, + abstract = {Binary logistic regression is one of the most frequently applied statistical approaches for developing clinical prediction models. Developers of such models often rely on an Events Per Variable criterion (EPV), notably EPV ≥10, to determine the minimal sample size required and the maximum number of candidate predictors that can be examined. We present an extensive simulation study in which we studied the influence of EPV, events fraction, number of candidate predictors, the correlations and distributions of candidate predictor variables, area under the ROC curve, and predictor effects on out-of-sample predictive performance of prediction models. The out-of-sample performance (calibration, discrimination and probability prediction error) of developed prediction models was studied before and after regression shrinkage and variable selection. The results indicate that EPV does not have a strong relation with metrics of predictive performance, and is not an appropriate criterion for (binary) prediction model development studies. We show that out-of-sample predictive performance can better be approximated by considering the number of predictors, the total sample size and the events fraction. We propose that the development of new sample size criteria for prediction models should be based on these three parameters, and provide suggestions for improving sample size determination.}, + language = {en}, + number = {8}, + urldate = {2022-09-05}, + journal = {Statistical Methods in Medical Research}, + author = {van Smeden, Maarten and Moons, Karel GM and de Groot, Joris AH and Collins, Gary S and Altman, Douglas G and Eijkemans, Marinus JC and Reitsma, Johannes B}, + month = aug, + year = {2019}, + note = {Publisher: SAGE Publications Ltd STM}, + keywords = {sample size, EPV, Logistic regression, prediction models, predictive performance, simulations}, + pages = {2455--2474}, + file = {SAGE PDF Full Text:C\:\\Users\\ditdi\\Zotero\\storage\\GYDBKBVA\\van Smeden et al. - 2019 - Sample size for binary logistic prediction models.pdf:application/pdf}, +} + +@misc{van_calster_variability_2019, + title = {On the variability of regression shrinkage methods for clinical prediction models: simulation study on predictive performance}, + shorttitle = {On the variability of regression shrinkage methods for clinical prediction models}, + url = {http://arxiv.org/abs/1907.11493}, + doi = {10.48550/arXiv.1907.11493}, + abstract = {When developing risk prediction models, shrinkage methods are recommended, especially when the sample size is limited. Several earlier studies have shown that the shrinkage of model coefficients can reduce overfitting of the prediction model and subsequently result in better predictive performance on average. In this simulation study, we aimed to investigate the variability of regression shrinkage on predictive performance for a binary outcome, with focus on the calibration slope. The slope indicates whether risk predictions are too extreme (slope {\textless} 1) or not extreme enough (slope {\textgreater} 1). We investigated the following shrinkage methods in comparison to standard maximum likelihood estimation: uniform shrinkage (likelihood-based and bootstrap-based), ridge regression, penalized maximum likelihood, LASSO regression, adaptive LASSO, non-negative garrote, and Firth's correction. There were three main findings. First, shrinkage improved calibration slopes on average. Second, the between-sample variability of calibration slopes was often increased relative to maximum likelihood. Among the shrinkage methods, the bootstrap-based uniform shrinkage worked well overall. In contrast to other shrinkage approaches, Firth's correction had only a small shrinkage effect but did so with low variability. Third, the correlation between the estimated shrinkage and the optimal shrinkage to remove overfitting was typically negative. Hence, although shrinkage improved predictions on average, it often worked poorly in individual datasets, in particular when shrinkage was most needed. The observed variability of shrinkage methods implies that these methods do not solve problems associated with small sample size or low number of events per variable.}, + urldate = {2022-09-05}, + publisher = {arXiv}, + author = {Van Calster, Ben and van Smeden, Maarten and Steyerberg, Ewout W.}, + month = jul, + year = {2019}, + note = {arXiv:1907.11493 [stat]}, + keywords = {62J07, Statistics - Methodology}, + file = {arXiv Fulltext PDF:C\:\\Users\\ditdi\\Zotero\\storage\\JIRF9ZQ4\\Van Calster et al. - 2019 - On the variability of regression shrinkage methods.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\6A928ZKX\\1907.html:text/html}, +} + +@article{morris_using_2019, + title = {Using simulation studies to evaluate statistical methods}, + volume = {38}, + issn = {1097-0258}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.8086}, + doi = {10.1002/sim.8086}, + abstract = {Simulation studies are computer experiments that involve creating data by pseudo-random sampling. A key strength of simulation studies is the ability to understand the behavior of statistical methods because some “truth” (usually some parameter/s of interest) is known from the process of generating the data. This allows us to consider properties of methods, such as bias. While widely used, simulation studies are often poorly designed, analyzed, and reported. This tutorial outlines the rationale for using simulation studies and offers guidance for design, execution, analysis, reporting, and presentation. In particular, this tutorial provides a structured approach for planning and reporting simulation studies, which involves defining aims, data-generating mechanisms, estimands, methods, and performance measures (“ADEMP”); coherent terminology for simulation studies; guidance on coding simulation studies; a critical discussion of key performance measures and their estimation; guidance on structuring tabular and graphical presentation of results; and new graphical presentations. With a view to describing recent practice, we review 100 articles taken from Volume 34 of Statistics in Medicine, which included at least one simulation study and identify areas for improvement.}, + language = {en}, + number = {11}, + urldate = {2022-09-05}, + journal = {Statistics in Medicine}, + author = {Morris, Tim P. and White, Ian R. and Crowther, Michael J.}, + year = {2019}, + keywords = {Monte Carlo, graphics for simulation, simulation design, simulation reporting, simulation studies}, + pages = {2074--2102}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\5YBUMISK\\Morris et al. - 2019 - Using simulation studies to evaluate statistical m.pdf:application/pdf}, +} + +@article{van_calster_calibration_2015, + title = {Calibration of {Risk} {Prediction} {Models}: {Impact} on {Decision}-{Analytic} {Performance}}, + volume = {35}, + issn = {0272-989X}, + shorttitle = {Calibration of {Risk} {Prediction} {Models}}, + url = {https://doi.org/10.1177/0272989X14547233}, + doi = {10.1177/0272989X14547233}, + abstract = {Decision-analytic measures to assess clinical utility of prediction models and diagnostic tests incorporate the relative clinical consequences of true and false positives without the need for external information such as monetary costs. Net Benefit is a commonly used metric that weights the relative consequences in terms of the risk threshold at which a patient would opt for treatment. Theoretical results demonstrate that clinical utility is affected by a model’;s calibration, the extent to which estimated risks correspond to observed event rates. We analyzed the effects of different types of miscalibration on Net Benefit and investigated whether and under what circumstances miscalibration can make a model clinically harmful. Clinical harm is defined as a lower Net Benefit compared with classifying all patients as positive or negative by default. We used simulated data to investigate the effect of overestimation, underestimation, overfitting (estimated risks too extreme), and underfitting (estimated risks too close to baseline risk) on Net Benefit for different choices of the risk threshold. In accordance with theory, we observed that miscalibration always reduced Net Benefit. Harm was sometimes observed when models underestimated risk at a threshold below the event rate (as in underestimation and overfitting) or overestimated risk at a threshold above event rate (as in overestimation and overfitting). Underfitting never resulted in a harmful model. The impact of miscalibration decreased with increasing discrimination. Net Benefit was less sensitive to miscalibration for risk thresholds close to the event rate than for other thresholds. We illustrate these findings with examples from the literature and with a case study on testicular cancer diagnosis. Our findings strengthen the importance of obtaining calibrated risk models.}, + language = {en}, + number = {2}, + urldate = {2022-09-08}, + journal = {Medical Decision Making}, + author = {Van Calster, Ben and Vickers, Andrew J.}, + month = feb, + year = {2015}, + note = {Publisher: SAGE Publications Inc STM}, + keywords = {discrimination, prediction models, calibration, decision curve analysis, decision-analytic measures, net benefit, risk scores}, + pages = {162--169}, + file = {Version acceptée:C\:\\Users\\ditdi\\Zotero\\storage\\2B7QCD9Z\\Van Calster et Vickers - 2015 - Calibration of Risk Prediction Models Impact on D.pdf:application/pdf}, +} + +@incollection{james_tree-based_2021, + address = {New York, NY}, + series = {Springer {Texts} in {Statistics}}, + title = {Tree-{Based} {Methods}}, + isbn = {978-1-07-161418-1}, + url = {https://doi.org/10.1007/978-1-0716-1418-1_8}, + abstract = {In this chapter, we describe tree-based methods for regression and classification. These involve stratifying or segmenting the predictor space into a number of simple regions. In order to make a prediction for a given observation, we typically use the mean or the mode response value for the training observations in the region to which it belongs.}, + language = {en}, + urldate = {2022-09-08}, + booktitle = {An {Introduction} to {Statistical} {Learning}: with {Applications} in {R}}, + publisher = {Springer US}, + author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert}, + editor = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert}, + year = {2021}, + doi = {10.1007/978-1-0716-1418-1_8}, + pages = {327--365}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\36VIMYT6\\James et al. - 2021 - Tree-Based Methods.pdf:application/pdf}, +} + +@article{van_calster_calibration_2016, + title = {A calibration hierarchy for risk models was defined: from utopia to empirical data}, + volume = {74}, + issn = {0895-4356}, + shorttitle = {A calibration hierarchy for risk models was defined}, + url = {https://www.sciencedirect.com/science/article/pii/S0895435615005818}, + doi = {10.1016/j.jclinepi.2015.12.005}, + abstract = {Objective +Calibrated risk models are vital for valid decision support. We define four levels of calibration and describe implications for model development and external validation of predictions. +Study Design and Setting +We present results based on simulated data sets. +Results +A common definition of calibration is “having an event rate of R\% among patients with a predicted risk of R\%,” which we refer to as “moderate calibration.” Weaker forms of calibration only require the average predicted risk (mean calibration) or the average prediction effects (weak calibration) to be correct. “Strong calibration” requires that the event rate equals the predicted risk for every covariate pattern. This implies that the model is fully correct for the validation setting. We argue that this is unrealistic: the model type may be incorrect, the linear predictor is only asymptotically unbiased, and all nonlinear and interaction effects should be correctly modeled. In addition, we prove that moderate calibration guarantees nonharmful decision making. Finally, results indicate that a flexible assessment of calibration in small validation data sets is problematic. +Conclusion +Strong calibration is desirable for individualized decision support but unrealistic and counter productive by stimulating the development of overly complex models. Model development and external validation should focus on moderate calibration.}, + language = {en}, + urldate = {2022-09-08}, + journal = {Journal of Clinical Epidemiology}, + author = {Van Calster, Ben and Nieboer, Daan and Vergouwe, Yvonne and De Cock, Bavo and Pencina, Michael J. and Steyerberg, Ewout W.}, + month = jun, + year = {2016}, + keywords = {Calibration, Overfitting, Risk prediction models, Decision curve analysis, External validation, Loess}, + pages = {167--176}, + file = {ScienceDirect Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\PNN246WR\\S0895435615005818.html:text/html;Version acceptée:C\:\\Users\\ditdi\\Zotero\\storage\\WMEBMWU4\\Van Calster et al. - 2016 - A calibration hierarchy for risk models was define.pdf:application/pdf}, +} + +@article{collins_predicting_2012, + title = {Predicting the 10 year risk of cardiovascular disease in the {United} {Kingdom}: independent and external validation of an updated version of {QRISK2}}, + volume = {344}, + copyright = {© Collins et al 2012. This is an open-access article distributed under the terms of the Creative Commons Attribution Non-commercial License, which permits use, distribution, and reproduction in any medium, provided the original work is properly cited, the use is non commercial and is otherwise in compliance with the license. See: http://creativecommons.org/licenses/by-nc/2.0/ and http://creativecommons.org/licenses/by-nc/2.0/legalcode.}, + issn = {1756-1833}, + shorttitle = {Predicting the 10 year risk of cardiovascular disease in the {United} {Kingdom}}, + url = {https://www.bmj.com/content/344/bmj.e4181}, + doi = {10.1136/bmj.e4181}, + abstract = {Objective To evaluate the performance of the QRISK2-2011 score for predicting the 10 year risk of cardiovascular disease in an independent UK cohort of patients from general practice and to compare it with earlier versions of the model and a National Institute for Health and Clinical Excellence version of the Framingham equation. +Design Prospective cohort study to validate a cardiovascular risk score with routinely collected data between June 1994 and June 2008. +Setting 364 practices from the United Kingdom contributing to The Health Improvement Network (THIN) database. +Participants Two million patients aged 30 to 84 years (11.8 million person years) with 93 564 cardiovascular events. +Main outcome measure First diagnosis of cardiovascular disease (myocardial infarction, angina, coronary heart disease, stroke, and transient ischaemic attack) recorded in general practice records. +Results Results from this independent and external validation of QRISK2-2011 indicate good performance data when compared with the NICE version of the Framingham equation. QRISK2-2011 had better ability to identify those at high risk of developing cardiovascular disease than did the NICE Framingham equation. QRISK2-2011 is well calibrated, with reasonable agreement between observed and predicted outcomes, whereas the NICE Framingham equation seems to consistently over-predict risk in men by about 5\% and shows poor calibration in women. +Conclusions QRISK2-2011 seems to be a useful model, with good discriminative and calibration properties when compared with the NICE version of the Framingham equation. Furthermore, based on current high risk thresholds, concerns exist on the clinical usefulness of the NICE version of the Framingham equation for identifying women at high risk of developing cardiovascular disease. At current thresholds the NICE version of the Framingham equation has no clinical benefit in either men or women.}, + language = {en}, + urldate = {2022-09-08}, + journal = {BMJ}, + author = {Collins, Gary S. and Altman, Douglas G.}, + month = jun, + year = {2012}, + pmid = {22723603}, + note = {Publisher: British Medical Journal Publishing Group +Section: Research}, + pages = {e4181}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\9V6LULAC\\Collins et Altman - 2012 - Predicting the 10 year risk of cardiovascular dise.pdf:application/pdf;Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\XZ2EZWVL\\bmj.e4181.html:text/html}, +} + +@article{garcia_leiva_novel_2019, + title = {A {Novel} {Hyperparameter}-{Free} {Approach} to {Decision} {Tree} {Construction} {That} {Avoids} {Overfitting} by {Design}}, + volume = {7}, + issn = {2169-3536}, + doi = {10.1109/ACCESS.2019.2930235}, + abstract = {Decision trees are an extremely popular machine learning technique. Unfortunately, overfitting in decision trees still remains an open issue that sometimes prevents achieving good performance. In this paper, we present a novel approach for the construction of decision trees that avoids the overfitting by design, without losing accuracy. A distinctive feature of our algorithm is that it requires neither the optimization of any hyperparameters, nor the use of regularization techniques, thus significantly reducing the decision tree training time. Moreover, our algorithm produces much smaller and shallower trees than traditional algorithms, facilitating the interpretability of the resulting models. For reproducibility, we provide an open source version of the algorithm.}, + journal = {IEEE Access}, + author = {García Leiva, Rafael and Fernández Anta, Antonio and Mancuso, Vincenzo and Casari, Paolo}, + year = {2019}, + note = {Conference Name: IEEE Access}, + keywords = {Training, Complexity theory, Computational modeling, Decision trees, interpretability, Kolmogorov complexity, Optimization, Prediction algorithms, regularization, Vegetation}, + pages = {99978--99987}, + file = {IEEE Xplore Abstract Record:C\:\\Users\\ditdi\\Zotero\\storage\\C3H66GGD\\8767915.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\REYK63PW\\García Leiva et al. - 2019 - A Novel Hyperparameter-Free Approach to Decision T.pdf:application/pdf}, +} + +@inproceedings{bernard_influence_2009, + address = {Berlin, Heidelberg}, + series = {Lecture {Notes} in {Computer} {Science}}, + title = {Influence of {Hyperparameters} on {Random} {Forest} {Accuracy}}, + isbn = {978-3-642-02326-2}, + doi = {10.1007/978-3-642-02326-2_18}, + abstract = {In this paper we present our work on the Random Forest (RF) family of classification methods. Our goal is to go one step further in the understanding of RF mechanisms by studying the parametrization of the reference algorithm Forest-RI. In this algorithm, a randomization principle is used during the tree induction process, that randomly selects K features at each node, among which the best split is chosen. The strength of randomization in the tree induction is thus led by the hyperparameter K which plays an important role for building accurate RF classifiers. We have decided to focus our experimental study on this hyperparameter and on its influence on classification accuracy. For that purpose, we have evaluated the Forest-RI algorithm on several machine learning problems and with different settings of K in order to understand the way it acts on RF performance. We show that default values of K traditionally used in the literature are globally near-optimal, except for some cases for which they are all significatively sub-optimal. Thus additional experiments have been led on those datasets, that highlight the crucial role played by feature relevancy in finding the optimal setting of K.}, + language = {en}, + booktitle = {Multiple {Classifier} {Systems}}, + publisher = {Springer}, + author = {Bernard, Simon and Heutte, Laurent and Adam, Sébastien}, + editor = {Benediktsson, Jón Atli and Kittler, Josef and Roli, Fabio}, + year = {2009}, + keywords = {Decision Trees, Ensemble Method, Random Forests, Supervised Learning}, + pages = {171--180}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\MGQZQR65\\Bernard et al. - 2009 - Influence of Hyperparameters on Random Forest Accu.pdf:application/pdf}, +} + +@misc{mantovani_empirical_2019, + title = {An empirical study on hyperparameter tuning of decision trees}, + url = {http://arxiv.org/abs/1812.02207}, + doi = {10.48550/arXiv.1812.02207}, + abstract = {Machine learning algorithms often contain many hyperparameters whose values affect the predictive performance of the induced models in intricate ways. Due to the high number of possibilities for these hyperparameter configurations, and their complex interactions, it is common to use optimization techniques to find settings that lead to high predictive accuracy. However, we lack insight into how to efficiently explore this vast space of configurations: which are the best optimization techniques, how should we use them, and how significant is their effect on predictive or runtime performance? This paper provides a comprehensive approach for investigating the effects of hyperparameter tuning on three Decision Tree induction algorithms, CART, C4.5 and CTree. These algorithms were selected because they are based on similar principles, have presented a high predictive performance in several previous works and induce interpretable classification models. Additionally, they contain many interacting hyperparameters to be adjusted. Experiments were carried out with different tuning strategies to induce models and evaluate the relevance of hyperparameters using 94 classification datasets from OpenML. Experimental results indicate that hyperparameter tuning provides statistically significant improvements for C4.5 and CTree in only one-third of the datasets, and in most of the datasets for CART. Different tree algorithms may present different tuning scenarios, but in general, the tuning techniques required relatively few iterations to find accurate solutions. Furthermore, the best technique for all the algorithms was the Irace. Finally, we find that tuning a specific small subset of hyperparameters contributes most of the achievable optimal predictive performance.}, + urldate = {2022-09-08}, + publisher = {arXiv}, + author = {Mantovani, Rafael Gomes and Horváth, Tomáš and Cerri, Ricardo and Junior, Sylvio Barbon and Vanschoren, Joaquin and de Carvalho, André Carlos Ponce de Leon Ferreira}, + month = feb, + year = {2019}, + note = {arXiv:1812.02207 [cs, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning}, + file = {arXiv Fulltext PDF:C\:\\Users\\ditdi\\Zotero\\storage\\7P8Y73GP\\Mantovani et al. - 2019 - An empirical study on hyperparameter tuning of dec.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\CPUX6726\\1812.html:text/html}, +} + +@misc{hazan_hyperparameter_2018, + title = {Hyperparameter {Optimization}: {A} {Spectral} {Approach}}, + shorttitle = {Hyperparameter {Optimization}}, + url = {http://arxiv.org/abs/1706.00764}, + doi = {10.48550/arXiv.1706.00764}, + abstract = {We give a simple, fast algorithm for hyperparameter optimization inspired by techniques from the analysis of Boolean functions. We focus on the high-dimensional regime where the canonical example is training a neural network with a large number of hyperparameters. The algorithm --- an iterative application of compressed sensing techniques for orthogonal polynomials --- requires only uniform sampling of the hyperparameters and is thus easily parallelizable. Experiments for training deep neural networks on Cifar-10 show that compared to state-of-the-art tools (e.g., Hyperband and Spearmint), our algorithm finds significantly improved solutions, in some cases better than what is attainable by hand-tuning. In terms of overall running time (i.e., time required to sample various settings of hyperparameters plus additional computation time), we are at least an order of magnitude faster than Hyperband and Bayesian Optimization. We also outperform Random Search 8x. Additionally, our method comes with provable guarantees and yields the first improvements on the sample complexity of learning decision trees in over two decades. In particular, we obtain the first quasi-polynomial time algorithm for learning noisy decision trees with polynomial sample complexity.}, + urldate = {2022-09-08}, + publisher = {arXiv}, + author = {Hazan, Elad and Klivans, Adam and Yuan, Yang}, + month = jan, + year = {2018}, + note = {arXiv:1706.00764 [cs, math, stat]}, + keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Artificial Intelligence, Mathematics - Optimization and Control}, + file = {arXiv Fulltext PDF:C\:\\Users\\ditdi\\Zotero\\storage\\YHF7YKP2\\Hazan et al. - 2018 - Hyperparameter Optimization A Spectral Approach.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\CTJ8MY7P\\1706.html:text/html}, +} + +@article{noauthor_developing_2021, + title = {Developing a {Hyperparameter} {Tuning} {Based} {Machine} {Learning} {Approach} of {Heart} {Disease} {Prediction} {\textbar} {Journal} of {Applied} {Science} \& {Process} {Engineering}}, + url = {https://publisher.unimas.my/ojs/index.php/JASPE/article/view/2639}, + abstract = {Machine learning techniques are widely used in healthcare sectors to predict fatal diseases. The objective of this research was to develop and compare the performance of the traditional system with the proposed system that predicts the heart disease implementing the Logistic regression, K-nearest neighbor, Support vector machine, Decision tree, and Random Forest classification models. The proposed system helped to tune the hyperparameters using the grid search approach to the five mentioned classification algorithms. The performance of the heart disease prediction system is the major research issue. With the hyperparameter tuning model, it can be used to enhance the performance of the prediction models. The achievement of the traditional and proposed system was evaluated and compared in terms of accuracy, precision, recall, and F1 score. As the traditional system achieved accuracies between 81.97\% and 90.16\%., the proposed hyperparameter tuning model achieved accuracies in the range increased between 85.25\% and 91.80\%. These evaluations demonstrated that the proposed prediction approach is capable of achieving more accurate results compared with the traditional approach in predicting heart disease with the acquisition of feasible performance.}, + language = {en-US}, + urldate = {2022-09-08}, + month = sep, + year = {2021}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\3JDF7YEY\\2021 - Developing a Hyperparameter Tuning Based Machine L.pdf:application/pdf}, +} + +@article{muhajir_improving_2022, + series = {Sixth {Information} {Systems} {International} {Conference} ({ISICO} 2021)}, + title = {Improving classification algorithm on education dataset using hyperparameter tuning}, + volume = {197}, + issn = {1877-0509}, + url = {https://www.sciencedirect.com/science/article/pii/S1877050921023954}, + doi = {10.1016/j.procs.2021.12.171}, + abstract = {In this paper, researchers propose a classification method for any institution’s campus placement possibility using Placement Data Full Class for campus recruitment dataset. Researchers attempt to study the supervised learning classification algorithms such Logistic Regression, Support Vector Classifier (SVC), K-Nearest Neighbors (KNN), Gaussian Naive Bayes, Decision Tree, Random Forest, Gradient Boosting, and Linear Discriminant Analysis (LDA). Hyperparameter optimization also used to optimize the supervised algorithms for better results. Experimental results have found that by using hyperparameter tuning in Linear Discriminant Analysis (LDA), it can increase the accuracy performance results, and also given a better result compared to other algorithms.}, + language = {en}, + urldate = {2022-09-08}, + journal = {Procedia Computer Science}, + author = {Muhajir, Daud and Akbar, Muhammad and Bagaskara, Affindi and Vinarti, Retno}, + month = jan, + year = {2022}, + keywords = {campus recruitment, decision tree, Gaussian Naive Bayes, gradient boosting, hyperparameter optimization, K Nearest Neighbors (KNN), Linear Discriminant Analysis (LDA), logistic regression, random forest, Supervised classification, support vector classifier (SVC)}, + pages = {538--544}, + file = {ScienceDirect Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\E9BIS9SM\\Muhajir et al. - 2022 - Improving classification algorithm on education da.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\8WXCIBWI\\S1877050921023954.html:text/html}, +} + +@article{pratola_bayesian_2016, + title = {Bayesian {Additive} {Regression} {Tree} {Calibration} of {Complex} {High}-{Dimensional} {Computer} {Models}}, + volume = {58}, + issn = {0040-1706}, + url = {https://doi.org/10.1080/00401706.2015.1049749}, + doi = {10.1080/00401706.2015.1049749}, + abstract = {Complex natural phenomena are increasingly investigated by the use of a complex computer simulator. To leverage the advantages of simulators, observational data need to be incorporated in a probabilistic framework so that uncertainties can be quantified. A popular framework for such experiments is the statistical computer model calibration experiment. A limitation often encountered in current statistical approaches for such experiments is the difficulty in modeling high-dimensional observational datasets and simulator outputs as well as high-dimensional inputs. As the complexity of simulators seems to only grow, this challenge will continue unabated. In this article, we develop a Bayesian statistical calibration approach that is ideally suited for such challenging calibration problems. Our approach leverages recent ideas from Bayesian additive regression Tree models to construct a random basis representation of the simulator outputs and observational data. The approach can flexibly handle high-dimensional datasets, high-dimensional simulator inputs, and calibration parameters while quantifying important sources of uncertainty in the resulting inference. We demonstrate our methodology on a CO2 emissions rate calibration problem, and on a complex simulator of subterranean radionuclide dispersion, which simulates the spatial–temporal diffusion of radionuclides released during nuclear bomb tests at the Nevada Test Site. Supplementary computer code and datasets are available online.}, + number = {2}, + urldate = {2022-09-08}, + journal = {Technometrics}, + author = {Pratola, M. T. and Higdon, D. M.}, + month = apr, + year = {2016}, + note = {Publisher: Taylor \& Francis +\_eprint: https://doi.org/10.1080/00401706.2015.1049749}, + keywords = {Climate change, Catastrophe model, Markov chain Monte Carlo, Nonparametric, Treaty verification, Uncertainty quantification}, + pages = {166--179}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\945IE2DV\\Pratola et Higdon - 2016 - Bayesian Additive Regression Tree Calibration of C.pdf:application/pdf}, +} + +@article{hickey_dynamic_2013, + title = {Dynamic {Prediction} {Modeling} {Approaches} for {Cardiac} {Surgery}}, + volume = {6}, + url = {https://www.ahajournals.org/doi/full/10.1161/CIRCOUTCOMES.111.000012}, + doi = {10.1161/CIRCOUTCOMES.111.000012}, + abstract = {Background— + +The calibration of several cardiac clinical prediction models has deteriorated over time. We compare different model fitting approaches for in-hospital mortality after cardiac surgery that adjust for cross-sectional case mix in a heterogeneous patient population. + +Methods and Results— + +Data from {\textgreater}300 000 consecutive cardiac surgery procedures performed at all National Health Service and some private hospitals in England and Wales between April 2001 and March 2011 were extracted from the National Institute for Cardiovascular Outcomes Research clinical registry. The study outcome was in-hospital mortality. Model approaches included not updating, periodic refitting, rolling window, and dynamic logistic regression. Covariate adjustment was made in each model using variables included in the logistic European System for Cardiac Operative Risk Evaluation model. The association between in-hospital mortality and some variables changed with time. Notably, the intercept coefficient has been steadily decreasing during the study period, consistent with decreasing observed mortality. Some risk factors, such as operative urgency and postinfarct ventricular septal defect, have been relatively stable over time, whereas other risk factors, such as left ventricular function and surgery on the thoracic aorta, have been associated with lower risk relative to the static model. + +Conclusions— + +Dynamic models or periodic model refitting is necessary to counteract calibration drift. A dynamic modeling framework that uses contemporary and available historic data can provide a continuously smooth update mechanism that also allows for inferences to be made on individual risk factors. Better models that withstand the effects of time give advantages for governance, quality improvement, and patient-level decision making.}, + number = {6}, + urldate = {2022-09-08}, + journal = {Circulation: Cardiovascular Quality and Outcomes}, + author = {Hickey, Graeme L. and Grant, Stuart W. and Caiado, Camila and Kendall, Simon and Dunning, Joel and Poullis, Michael and Buchan, Iain and Bridgewater, Ben}, + month = nov, + year = {2013}, + note = {Publisher: American Heart Association}, + keywords = {calibration, Bayesian forecast, clinical governance, logistic model}, + pages = {649--658}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\3233A89N\\Hickey et al. - 2013 - Dynamic Prediction Modeling Approaches for Cardiac.pdf:application/pdf}, +} + +@article{shekar_grid_2019, + title = {Grid {Search}-{Based} {Hyperparameter} {Tuning} and {Classification} of {Microarray} {Cancer} {Data}}, + abstract = {Cancer is a group of diseases caused due to abnormal cell growth. Due to the innovation of microarray technology, a large variety of microarray cancer datasets are produced and hence open up avenues to carry out research work across several disciplines such as Statistics, Computational Biology, Genomic studies and other related fields. The main challenges in analyzing microarray cancer data are the curse of dimensionality, small sample size, noisy data, and imbalance class problem. In this work, we are proposing grid search-based hyperparameter tuning (GSHPT) for random forest parameters to classify Microarray Cancer Data. A grid search is designed by a set of fixed parameter values which are essential in providing optimal accuracy on the basis of n-fold cross-validation. In our work, the 10-fold cross validation is considered. The grid search algorithm provides best parameters such as the number of features to consider at each split, number of trees in the forest, the maximum depth of the tree and the minimum number of samples required to be split at the leaf node. The maximum number of trees considered are 10, 20 and 70 respectively for Ovarian, 3-class Leukemia, and 3-class Leukemia cancer data. In the case of MLL and SRBCT, 50 trees are generated to achieve the maximum classification accuracy. The Gini index is employed as criteria to split the nodes and the maximum depth of the tree is set to 2 for all datasets. Experimental results of the proposed work show an improvement over the state of the art methods. The performance of the proposed method is evaluated using standard metrics such as classification accuracy, precision, recall, f1-score, confusion matrix and misclassification rate and comparative analysis is performed and the results are provided to reveal the performance of the proposed method.}, + language = {en}, + author = {Shekar, B H and Dagnew, Guesh}, + year = {2019}, + pages = {8}, + file = {Shekar et Dagnew - 2019 - Grid Search-Based Hyperparameter Tuning and Classi.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\U3B2CLT8\\Shekar et Dagnew - 2019 - Grid Search-Based Hyperparameter Tuning and Classi.pdf:application/pdf}, +} + +@article{alawad_tuning_nodate, + title = {Tuning {Hyperparameters} of {Decision} {Tree} {Classifiers} {Using} {Computationally} {Efficient} {Schemes}}, + abstract = {Attack types and patterns are constantly evolving which makes frequent detection system updates an urgent need. In contrast, the computation cost of developing machine learning-based detection models such as decision tree classifiers is expensive which can be an obstacle to frequently updating detection models. Tuning classifiers’ hyperparameters is a key factor in selecting the best detection model but it significantly increases the computation overhead of the developing procedure. In this research, we have presented a computationally efficient strategy and an algorithm for tuning decision tree classification algorithms’ hyperparameters with less budget and time.}, + language = {en}, + author = {Alawad, Wedad and Zohdy, Mohamed and Debnath, Debatosh}, + pages = {2}, + file = {Alawad et al. - Tuning Hyperparameters of Decision Tree Classifier.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\DK7BCTJP\\Alawad et al. - Tuning Hyperparameters of Decision Tree Classifier.pdf:application/pdf}, +} + +@article{mantovani_hyper-parameter_nodate, + title = {Hyper-{Parameter} {Tuning} of a {Decision} {Tree} {Induction} {Algorithm}}, + abstract = {Supervised classification is the most studied task in Machine Learning. Among the many algorithms used in such task, Decision Tree algorithms are a popular choice, since they are robust and efficient to construct. Moreover, they have the advantage of producing comprehensible models and satisfactory accuracy levels in several application domains. Like most of the Machine Leaning methods, these algorithms have some hyperparameters whose values directly affect the performance of the induced models. Due to the high number of possibilities for these hyper-parameter values, several studies use optimization techniques to find a good set of solutions in order to produce classifiers with good predictive performance. This study investigates how sensitive decision trees are to a hyper-parameter optimization process. Four different tuning techniques were explored to adjust J48 Decision Tree algorithm hyper-parameters. In total, experiments using 102 heterogeneous datasets analyzed the tuning effect on the induced models. The experimental results show that even presenting a low average improvement over all datasets, in most of the cases the improvement is statistically significant.}, + language = {en}, + author = {Mantovani, Rafael G and Horvath, Tomas and Cerri, Ricardo and Vanschoren, Joaquin}, + pages = {6}, + file = {Mantovani et al. - Hyper-Parameter Tuning of a Decision Tree Inductio.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\TBVTXLGQ\\Mantovani et al. - Hyper-Parameter Tuning of a Decision Tree Inductio.pdf:application/pdf}, +} + +@inproceedings{zainab_performance_2020, + address = {Atlanta, GA, USA}, + title = {Performance {Evaluation} of {Tree}-based {Models} for {Big} {Data} {Load} {Forecasting} using {Randomized} {Hyperparameter} {Tuning}}, + isbn = {978-1-72816-251-5}, + url = {https://ieeexplore.ieee.org/document/9378423/}, + doi = {10.1109/BigData50022.2020.9378423}, + abstract = {In this paper machine learning (ML) models have been developed for the application of big data load forecasting using parallel computation. The load forecasting models’ performance is directly linked to system execution capacity, memory, thread count, balancing the load, and available resources. This paper is focused on two main challenges. The first challenge is to reduce the execution time of the ML models and the second one is to choose the suitable tree-based model for effective load forecasting. The paper conducts a comprehensive evaluation of the load forecasting using real-world data on energy consumption. Comprehensive results are obtained to show that the performance of random search to tune the ML models exhibits competitive performances whilst not losing the accuracy of the models and gaining a competitive advantage on the run time.}, + language = {en}, + urldate = {2022-09-08}, + booktitle = {2020 {IEEE} {International} {Conference} on {Big} {Data} ({Big} {Data})}, + publisher = {IEEE}, + author = {Zainab, Ameema and Ghrayeb, Ali and Houchati, Mahdi and Refaat, Shady S. and Abu-Rub, Haitham}, + month = dec, + year = {2020}, + pages = {5332--5339}, + file = {Zainab et al. - 2020 - Performance Evaluation of Tree-based Models for Bi.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\V53BDVZJ\\Zainab et al. - 2020 - Performance Evaluation of Tree-based Models for Bi.pdf:application/pdf}, +} + +@article{ellenbach_improved_2021, + title = {Improved {Outcome} {Prediction} {Across} {Data} {Sources} {Through} {Robust} {Parameter} {Tuning}}, + volume = {38}, + issn = {1432-1343}, + url = {https://doi.org/10.1007/s00357-020-09368-z}, + doi = {10.1007/s00357-020-09368-z}, + abstract = {In many application areas, prediction rules trained based on high-dimensional data are subsequently applied to make predictions for observations from other sources, but they do not always perform well in this setting. This is because data sets from different sources can feature (slightly) differing distributions, even if they come from similar populations. In the context of high-dimensional data and beyond, most prediction methods involve one or several tuning parameters. Their values are commonly chosen by maximizing the cross-validated prediction performance on the training data. This procedure, however, implicitly presumes that the data to which the prediction rule will be ultimately applied, follow the same distribution as the training data. If this is not the case, less complex prediction rules that slightly underfit the training data may be preferable. Indeed, a tuning parameter does not only control the degree of adjustment of a prediction rule to the training data, but also, more generally, the degree of adjustment to the distribution of the training data. On the basis of this idea, in this paper we compare various approaches including new procedures for choosing tuning parameter values that lead to better generalizing prediction rules than those obtained based on cross-validation. Most of these approaches use an external validation data set. In our extensive comparison study based on a large collection of 15 transcriptomic data sets, tuning on external data and robust tuning with a tuned robustness parameter are the two approaches leading to better generalizing prediction rules.}, + language = {en}, + number = {2}, + urldate = {2022-09-15}, + journal = {Journal of Classification}, + author = {Ellenbach, Nicole and Boulesteix, Anne-Laure and Bischl, Bernd and Unger, Kristian and Hornung, Roman}, + month = jul, + year = {2021}, + keywords = {Prediction, Batch effects, Robust modeling, Tuning parameter value optimization}, + pages = {212--231}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\PYECAFXJ\\Ellenbach et al. - 2021 - Improved Outcome Prediction Across Data Sources Th.pdf:application/pdf}, +} + +@article{breiman_random_2001, + title = {Random {Forests}}, + volume = {45}, + issn = {1573-0565}, + url = {https://doi.org/10.1023/A:1010933404324}, + doi = {10.1023/A:1010933404324}, + abstract = {Random forests are a combination of tree predictors such that each tree depends on the values of a random vector sampled independently and with the same distribution for all trees in the forest. The generalization error for forests converges a.s. to a limit as the number of trees in the forest becomes large. The generalization error of a forest of tree classifiers depends on the strength of the individual trees in the forest and the correlation between them. Using a random selection of features to split each node yields error rates that compare favorably to Adaboost (Y. Freund \& R. Schapire, Machine Learning: Proceedings of the Thirteenth International conference, ***, 148–156), but are more robust with respect to noise. Internal estimates monitor error, strength, and correlation and these are used to show the response to increasing the number of features used in the splitting. Internal estimates are also used to measure variable importance. These ideas are also applicable to regression.}, + language = {en}, + number = {1}, + urldate = {2022-09-15}, + journal = {Machine Learning}, + author = {Breiman, Leo}, + month = oct, + year = {2001}, + keywords = {classification, ensemble, regression}, + pages = {5--32}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\SRFNZUC6\\Breiman - 2001 - Random Forests.pdf:application/pdf}, +} + +@article{probst_hyperparameters_2019, + title = {Hyperparameters and tuning strategies for random forest}, + volume = {9}, + issn = {1942-4795}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1301}, + doi = {10.1002/widm.1301}, + abstract = {The random forest (RF) algorithm has several hyperparameters that have to be set by the user, for example, the number of observations drawn randomly for each tree and whether they are drawn with or without replacement, the number of variables drawn randomly for each split, the splitting rule, the minimum number of samples that a node must contain, and the number of trees. In this paper, we first provide a literature review on the parameters' influence on the prediction performance and on variable importance measures. It is well known that in most cases RF works reasonably well with the default values of the hyperparameters specified in software packages. Nevertheless, tuning the hyperparameters can improve the performance of RF. In the second part of this paper, after a presenting brief overview of tuning strategies, we demonstrate the application of one of the most established tuning strategies, model-based optimization (MBO). To make it easier to use, we provide the tuneRanger R package that tunes RF with MBO automatically. In a benchmark study on several datasets, we compare the prediction performance and runtime of tuneRanger with other tuning implementations in R and RF with default hyperparameters. This article is categorized under: Algorithmic Development {\textgreater} Biological Data Mining Algorithmic Development {\textgreater} Statistics Algorithmic Development {\textgreater} Hierarchies and Trees Technologies {\textgreater} Machine Learning}, + language = {en}, + number = {3}, + urldate = {2022-09-15}, + journal = {WIREs Data Mining and Knowledge Discovery}, + author = {Probst, Philipp and Wright, Marvin N. and Boulesteix, Anne-Laure}, + year = {2019}, + note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/widm.1301}, + keywords = {literature review, ensemble, out-of-bag, performance evaluation, ranger, sequential model-based optimization, tuning parameter}, + pages = {e1301}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\WCA4BGQ2\\Probst et al. - 2019 - Hyperparameters and tuning strategies for random f.pdf:application/pdf;Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\95INJ7C2\\widm.html:text/html}, +} + +@article{probst_tune_nodate, + title = {To {Tune} or {Not} to {Tune} the {Number} of {Trees} in {Random} {Forest}}, + abstract = {The number of trees T in the random forest (RF) algorithm for supervised learning has to be set by the user. It is unclear whether T should simply be set to the largest computationally manageable value or whether a smaller T may be sufficient or in some cases even better. While the principle underlying bagging is that more trees are better, in practice the classification error rate sometimes reaches a minimum before increasing again for increasing number of trees. The goal of this paper is four-fold: (i) providing theoretical results showing that the expected error rate may be a non-monotonous function of the number of trees and explaining under which circumstances this happens; (ii) providing theoretical results showing that such non-monotonous patterns cannot be observed for other performance measures such as the Brier score and the logarithmic loss (for classification) and the mean squared error (for regression); (iii) illustrating the extent of the problem through an application to a large number (n = 306) of datasets from the public database OpenML; (iv) finally arguing in favor of setting T to a computationally feasible large number as long as classical error measures based on average loss are considered.}, + language = {en}, + author = {Probst, Philipp and Boulesteix, Anne-Laure}, + pages = {18}, + file = {Probst et Boulesteix - To Tune or Not to Tune the Number of Trees in Rand.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\WSY3C8VL\\Probst et Boulesteix - To Tune or Not to Tune the Number of Trees in Rand.pdf:application/pdf}, +} + +@misc{brochu_tutorial_2010, + title = {A {Tutorial} on {Bayesian} {Optimization} of {Expensive} {Cost} {Functions}, with {Application} to {Active} {User} {Modeling} and {Hierarchical} {Reinforcement} {Learning}}, + url = {http://arxiv.org/abs/1012.2599}, + doi = {10.48550/arXiv.1012.2599}, + abstract = {We present a tutorial on Bayesian optimization, a method of finding the maximum of expensive cost functions. Bayesian optimization employs the Bayesian technique of setting a prior over the objective function and combining it with evidence to get a posterior function. This permits a utility-based selection of the next observation to make on the objective function, which must take into account both exploration (sampling from areas of high uncertainty) and exploitation (sampling areas likely to offer improvement over the current best observation). We also present two detailed extensions of Bayesian optimization, with experiments---active user modelling with preferences, and hierarchical reinforcement learning---and a discussion of the pros and cons of Bayesian optimization based on our experiences.}, + urldate = {2022-09-23}, + publisher = {arXiv}, + author = {Brochu, Eric and Cora, Vlad M. and de Freitas, Nando}, + month = dec, + year = {2010}, + note = {arXiv:1012.2599 [cs]}, + keywords = {Computer Science - Machine Learning, G.1.6, G.3, I.2.6}, + file = {arXiv Fulltext PDF:C\:\\Users\\ditdi\\Zotero\\storage\\8WMDC7C5\\Brochu et al. - 2010 - A Tutorial on Bayesian Optimization of Expensive C.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\YVWQY7ZJ\\1012.html:text/html}, +} + +@inproceedings{bergstra_algorithms_2011, + title = {Algorithms for {Hyper}-{Parameter} {Optimization}}, + volume = {24}, + url = {https://proceedings.neurips.cc/paper/2011/hash/86e8f7ab32cfd12577bc2619bc635690-Abstract.html}, + abstract = {Several recent advances to the state of the art in image classification benchmarks have come from better configurations of existing techniques rather than novel approaches to feature learning. Traditionally, hyper-parameter optimization has been the job of humans because they can be very efficient in regimes where only a few trials are possible. Presently, computer clusters and GPU processors make it possible to run more trials and we show that algorithmic approaches can find better results. We present hyper-parameter optimization results on tasks of training neural networks and deep belief networks (DBNs). We optimize hyper-parameters using random search and two new greedy sequential methods based on the expected improvement criterion. Random search has been shown to be sufficiently efficient for learning neural networks for several datasets, but we show it is unreliable for training DBNs. The sequential algorithms are applied to the most difficult DBN learning problems from [Larochelle et al., 2007] and find significantly better results than the best previously reported. This work contributes novel techniques for making response surface models P (y{\textbar}x) in which many elements of hyper-parameter assignment (x) are known to be irrelevant given particular values of other elements.}, + urldate = {2022-09-23}, + booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, + publisher = {Curran Associates, Inc.}, + author = {Bergstra, James and Bardenet, Rémi and Bengio, Yoshua and Kégl, Balázs}, + year = {2011}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\BF6PB5MW\\Bergstra et al. - 2011 - Algorithms for Hyper-Parameter Optimization.pdf:application/pdf}, +} + +@inproceedings{snoek_practical_2012, + title = {Practical {Bayesian} {Optimization} of {Machine} {Learning} {Algorithms}}, + volume = {25}, + url = {https://www.proceedings.neurips.cc/paper/2012/hash/05311655a15b75fab86956663e1819cd-Abstract.html}, + abstract = {The use of machine learning algorithms frequently involves careful tuning of learning parameters and model hyperparameters. Unfortunately, this tuning is often a “black art” requiring expert experience, rules of thumb, or sometimes brute-force search. There is therefore great appeal for automatic approaches that can optimize the performance of any given learning algorithm to the problem at hand. In this work, we consider this problem through the framework of Bayesian optimization, in which a learning algorithm’s generalization performance is modeled as a sample from a Gaussian process (GP). We show that certain choices for the nature of the GP, such as the type of kernel and the treatment of its hyperparameters, can play a crucial role in obtaining a good optimizer that can achieve expert-level performance. We describe new algorithms that take into account the variable cost (duration) of learning algorithm experiments and that can leverage the presence of multiple cores for parallel experimentation. We show that these proposed algorithms improve on previous automatic procedures and can reach or surpass human expert-level optimization for many algorithms including Latent Dirichlet Allocation, Structured SVMs and convolutional neural networks.}, + urldate = {2022-09-23}, + booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, + publisher = {Curran Associates, Inc.}, + author = {Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P}, + year = {2012}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\55847RAC\\Snoek et al. - 2012 - Practical Bayesian Optimization of Machine Learnin.pdf:application/pdf}, +} + +@inproceedings{hutter_sequential_2011, + address = {Berlin, Heidelberg}, + series = {Lecture {Notes} in {Computer} {Science}}, + title = {Sequential {Model}-{Based} {Optimization} for {General} {Algorithm} {Configuration}}, + isbn = {978-3-642-25566-3}, + doi = {10.1007/978-3-642-25566-3_40}, + abstract = {State-of-the-art algorithms for hard computational problems often expose many parameters that can be modified to improve empirical performance. However, manually exploring the resulting combinatorial space of parameter settings is tedious and tends to lead to unsatisfactory outcomes. Recently, automated approaches for solving this algorithm configuration problem have led to substantial improvements in the state of the art for solving various problems. One promising approach constructs explicit regression models to describe the dependence of target algorithm performance on parameter settings; however, this approach has so far been limited to the optimization of few numerical algorithm parameters on single instances. In this paper, we extend this paradigm for the first time to general algorithm configuration problems, allowing many categorical parameters and optimization for sets of instances. We experimentally validate our new algorithm configuration procedure by optimizing a local search and a tree search solver for the propositional satisfiability problem (SAT), as well as the commercial mixed integer programming (MIP) solver CPLEX. In these experiments, our procedure yielded state-of-the-art performance, and in many cases outperformed the previous best configuration approach.}, + language = {en}, + booktitle = {Learning and {Intelligent} {Optimization}}, + publisher = {Springer}, + author = {Hutter, Frank and Hoos, Holger H. and Leyton-Brown, Kevin}, + editor = {Coello, Carlos A. Coello}, + year = {2011}, + keywords = {Random Forest, General Algorithm, Local Search, Mixed Integer Programming, Numerical Parameter}, + pages = {507--523}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\LIFZYDZ3\\Hutter et al. - 2011 - Sequential Model-Based Optimization for General Al.pdf:application/pdf}, +} + +@inproceedings{bergstra_making_2013, + title = {Making a {Science} of {Model} {Search}: {Hyperparameter} {Optimization} in {Hundreds} of {Dimensions} for {Vision} {Architectures}}, + shorttitle = {Making a {Science} of {Model} {Search}}, + url = {https://proceedings.mlr.press/v28/bergstra13.html}, + abstract = {Many computer vision algorithms depend on configuration settings that are typically hand-tuned in the course of evaluating the algorithm for a particular data set. While such parameter tuning is often presented as being incidental to the algorithm, correctly setting these parameter choices is frequently critical to realizing a method’s full potential. Compounding matters, these parameters often must be re-tuned when the algorithm is applied to a new problem domain, and the tuning process itself often depends on personal experience and intuition in ways that are hard to quantify or describe. Since the performance of a given technique depends on both the fundamental quality of the algorithm and the details of its tuning, it is sometimes difficult to know whether a given technique is genuinely better, or simply better tuned. In this work, we propose a meta-modeling approach to support automated hyperparameter optimization, with the goal of providing practical tools that replace hand-tuning with a reproducible and unbiased optimization process. Our approach is to expose the underlying expression graph of how a performance metric (e.g. classification accuracy on validation examples) is computed from hyperparameters that govern not only how individual processing steps are applied, but even which processing steps are included. A hyperparameter optimization algorithm transforms this graph into a program for optimizing that performance metric. Our approach yields state of the art results on three disparate computer vision problems: a face-matching verification task (LFW), a face identification task (PubFig83) and an object recognition task (CIFAR-10), using a single broad class of feed-forward vision architectures.}, + language = {en}, + urldate = {2022-09-24}, + booktitle = {Proceedings of the 30th {International} {Conference} on {Machine} {Learning}}, + publisher = {PMLR}, + author = {Bergstra, James and Yamins, Daniel and Cox, David}, + month = feb, + year = {2013}, + note = {ISSN: 1938-7228}, + pages = {115--123}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\8U3D4349\\Bergstra et al. - 2013 - Making a Science of Model Search Hyperparameter O.pdf:application/pdf}, +} + +@incollection{birattari_f-race_2010, + address = {Berlin, Heidelberg}, + title = {F-{Race} and {Iterated} {F}-{Race}: {An} {Overview}}, + isbn = {978-3-642-02538-9}, + shorttitle = {F-{Race} and {Iterated} {F}-{Race}}, + url = {https://doi.org/10.1007/978-3-642-02538-9_13}, + abstract = {Algorithms for solving hard optimization problems typically have several parameters that need to be set appropriately such that some aspect of performance is optimized. In this chapter, we review F-Race, a racing algorithm for the task of automatic algorithm configuration. F-Race is based on a statistical approach for selecting the best configuration out of a set of candidate configurations under stochastic evaluations. We review the ideas underlying this technique and discuss an extension of the initial F-Race algorithm, which leads to a family of algorithms that we call iterated F-Race. Experimental results comparing one specific implementation of iterated F-Race to the original F-Race algorithm confirm the potential of this family of algorithms.}, + language = {en}, + urldate = {2022-09-24}, + booktitle = {Experimental {Methods} for the {Analysis} of {Optimization} {Algorithms}}, + publisher = {Springer}, + author = {Birattari, Mauro and Yuan, Zhi and Balaprakash, Prasanna and Stützle, Thomas}, + editor = {Bartz-Beielstein, Thomas and Chiarandini, Marco and Paquete, Luís and Preuss, Mike}, + year = {2010}, + doi = {10.1007/978-3-642-02538-9_13}, + keywords = {Local Search, Full Factorial Design, Iterate Local Search, Timetabling Problem, Travel Salesman Problem}, + pages = {311--336}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\TKZ845B5\\Birattari et al. - 2010 - F-Race and Iterated F-Race An Overview.pdf:application/pdf}, +} + +@article{yao_general_1991, + title = {General simulated annealing}, + volume = {6}, + issn = {1860-4749}, + url = {https://doi.org/10.1007/BF02948392}, + doi = {10.1007/BF02948392}, + abstract = {Simulated annealing is a new kind of random search methods developed in recent years. It can also be considered as an extension to the classical hill-climbing method in AI—probabilistic hill climbing. One of its most important features is its global convergence. The convergence of simulated annealing algorithm is determined by state generating probability, state accepting probability, and temperature decreasing rate. This paper gives a generalized simulated annealing algorithm with dynamic generating and accepting probabilities. The paper also shows that the generating and accepting probabilities can adopt many different kinds of distributions while the global convergence is guaranteed.}, + language = {en}, + number = {4}, + urldate = {2022-09-25}, + journal = {Journal of Computer Science and Technology}, + author = {Yao, Xin and Li, Guojie}, + month = oct, + year = {1991}, + keywords = {Cauchy Distribution, Global Convergence, Global Optimum State, Markov Chain, Simulated Annealing}, + pages = {329--338}, +} + +@article{connolly_general_1992, + title = {General {Purpose} {Simulated} {Annealing}}, + volume = {43}, + issn = {0160-5682}, + url = {http://www.jstor.org/stable/2583568}, + doi = {10.2307/2583568}, + abstract = {This paper reports on an attempt to write a general purpose simulated annealing algorithm, capable of finding good solutions to problems expressed as pure 0-1 integer linear programs. Computational results are given to support the claim that the resulting program might be a useful addition to the arsenal of techniques for researchers and practitioners wishing to tackle certain types of large scale 0-1 ILPs.}, + number = {5}, + urldate = {2022-09-25}, + journal = {The Journal of the Operational Research Society}, + author = {Connolly, David}, + year = {1992}, + note = {Publisher: Palgrave Macmillan Journals}, + pages = {495--505}, + file = {JSTOR Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\5U6PATDG\\Connolly - 1992 - General Purpose Simulated Annealing.pdf:application/pdf}, +} + +@article{bohachevsky_generalized_1986, + title = {Generalized {Simulated} {Annealing} for {Function} {Optimization}}, + volume = {28}, + issn = {0040-1706}, + url = {http://www.jstor.org/stable/1269076}, + doi = {10.2307/1269076}, + abstract = {A generalized simulated annealing method has been developed and applied to the optimization of functions (possibly constrained) having many local extrema. The method is illustrated in some difficult pedagogical examples and used to solve a problem analyzed by Bates (Technometrics, 25, pp. 373-376, 1983), for which we identify an improved optimum. The sensitivity of the solution to changes in the constraints and in other specifications of the problem is analyzed and discussed}, + number = {3}, + urldate = {2022-09-25}, + journal = {Technometrics}, + author = {Bohachevsky, Ihor O. and Johnson, Mark E. and Stein, Myron L.}, + year = {1986}, + note = {Publisher: [Taylor \& Francis, Ltd., American Statistical Association, American Society for Quality]}, + pages = {209--217}, + file = {JSTOR Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\NGSDUCG5\\Bohachevsky et al. - 1986 - Generalized Simulated Annealing for Function Optim.pdf:application/pdf}, +} + +@article{jones_efficient_1998, + title = {Efficient {Global} {Optimization} of {Expensive} {Black}-{Box} {Functions}}, + volume = {13}, + issn = {1573-2916}, + url = {https://doi.org/10.1023/A:1008306431147}, + doi = {10.1023/A:1008306431147}, + abstract = {In many engineering optimization problems, the number of function evaluations is severely limited by time or cost. These problems pose a special challenge to the field of global optimization, since existing methods often require more function evaluations than can be comfortably afforded. One way to address this challenge is to fit response surfaces to data collected by evaluating the objective and constraint functions at a few points. These surfaces can then be used for visualization, tradeoff analysis, and optimization. In this paper, we introduce the reader to a response surface methodology that is especially good at modeling the nonlinear, multimodal functions that often occur in engineering. We then show how these approximating functions can be used to construct an efficient global optimization algorithm with a credible stopping rule. The key to using response surfaces for global optimization lies in balancing the need to exploit the approximating surface (by sampling where it is minimized) with the need to improve the approximation (by sampling where prediction error may be high). Striking this balance requires solving certain auxiliary problems which have previously been considered intractable, but we show how these computational obstacles can be overcome.}, + language = {en}, + number = {4}, + urldate = {2022-09-25}, + journal = {Journal of Global Optimization}, + author = {Jones, Donald R. and Schonlau, Matthias and Welch, William J.}, + month = dec, + year = {1998}, + keywords = {Bayesian global optimization, Kriging, Random function, Response surface, Stochastic process, Visualization}, + pages = {455--492}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\I38JJ5MP\\Jones et al. - 1998 - Efficient Global Optimization of Expensive Black-B.pdf:application/pdf}, +} + +@article{yang_hyperparameter_2020, + title = {On hyperparameter optimization of machine learning algorithms: {Theory} and practice}, + volume = {415}, + issn = {0925-2312}, + shorttitle = {On hyperparameter optimization of machine learning algorithms}, + url = {https://www.sciencedirect.com/science/article/pii/S0925231220311693}, + doi = {10.1016/j.neucom.2020.07.061}, + abstract = {Machine learning algorithms have been used widely in various applications and areas. To fit a machine learning model into different problems, its hyper-parameters must be tuned. Selecting the best hyper-parameter configuration for machine learning models has a direct impact on the model’s performance. It often requires deep knowledge of machine learning algorithms and appropriate hyper-parameter optimization techniques. Although several automatic optimization techniques exist, they have different strengths and drawbacks when applied to different types of problems. In this paper, optimizing the hyper-parameters of common machine learning models is studied. We introduce several state-of-the-art optimization techniques and discuss how to apply them to machine learning algorithms. Many available libraries and frameworks developed for hyper-parameter optimization problems are provided, and some open challenges of hyper-parameter optimization research are also discussed in this paper. Moreover, experiments are conducted on benchmark datasets to compare the performance of different optimization methods and provide practical examples of hyper-parameter optimization. This survey paper will help industrial users, data analysts, and researchers to better develop machine learning models by identifying the proper hyper-parameter configurations effectively.}, + language = {en}, + urldate = {2022-09-26}, + journal = {Neurocomputing}, + author = {Yang, Li and Shami, Abdallah}, + month = nov, + year = {2020}, + keywords = {Machine learning, Bayesian optimization, Genetic algorithm, Grid search, Hyper-parameter optimization, Particle swarm optimization}, + pages = {295--316}, + file = {ScienceDirect Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\BKVC3TCK\\S0925231220311693.html:text/html;Version soumise:C\:\\Users\\ditdi\\Zotero\\storage\\4GWECE9J\\Yang et Shami - 2020 - On hyperparameter optimization of machine learning.pdf:application/pdf}, +} + +@article{wessler_tufts_2017, + title = {Tufts {PACE} clinical predictive model registry: update 1990 through 2015}, + volume = {1}, + shorttitle = {Tufts {PACE} clinical predictive model registry}, + number = {1}, + journal = {Diagnostic and prognostic research}, + author = {Wessler, Benjamin S. and Paulus, Jessica and Lundquist, Christine M. and Ajlan, Muhammad and Natto, Zuhair and Janes, William A. and Jethmalani, Nitin and Raman, Gowri and Lutz, Jennifer S. and Kent, David M.}, + year = {2017}, + note = {Publisher: Springer}, + pages = {1--8}, + file = {Full Text:C\:\\Users\\ditdi\\Zotero\\storage\\E59EVQA7\\s41512-017-0021-2.html:text/html}, +} + +@book{mitchell_machine_1997, + title = {Machine learning}, + volume = {1}, + number = {9}, + publisher = {McGraw-hill New York}, + author = {Mitchell, Tom M. and Mitchell, Tom M.}, + year = {1997}, + file = {Full Text:C\:\\Users\\ditdi\\Zotero\\storage\\VI89R6RT\\Mitchell et Mitchell - 1997 - Machine learning.pdf:application/pdf}, +} + +@article{probst_tunability_2019, + title = {Tunability: {Importance} of hyperparameters of machine learning algorithms}, + volume = {20}, + shorttitle = {Tunability}, + number = {1}, + journal = {The Journal of Machine Learning Research}, + author = {Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd}, + year = {2019}, + note = {Publisher: JMLR. org}, + pages = {1934--1965}, + file = {Full Text:C\:\\Users\\ditdi\\Zotero\\storage\\7UNZW9HS\\Probst et al. - 2019 - Tunability Importance of hyperparameters of machi.pdf:application/pdf;Probst et al. - Tunability Importance of Hyperparameters of Machi.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\8ZE4ILXT\\Probst et al. - Tunability Importance of Hyperparameters of Machi.pdf:application/pdf}, +} + +@article{riley_calculating_2020, + title = {Calculating the sample size required for developing a clinical prediction model}, + volume = {368}, + copyright = {Published by the BMJ Publishing Group Limited. For permission to use (where not already granted under a licence) please go to http://group.bmj.com/group/rights-licensing/permissions}, + issn = {1756-1833}, + url = {https://www.bmj.com/content/368/bmj.m441}, + doi = {10.1136/bmj.m441}, + abstract = {{\textless}p{\textgreater}Clinical prediction models aim to predict outcomes in individuals, to inform diagnosis or prognosis in healthcare. Hundreds of prediction models are published in the medical literature each year, yet many are developed using a dataset that is too small for the total number of participants or outcome events. This leads to inaccurate predictions and consequently incorrect healthcare decisions for some individuals. In this article, the authors provide guidance on how to calculate the sample size required to develop a clinical prediction model.{\textless}/p{\textgreater}}, + language = {en}, + urldate = {2022-10-07}, + journal = {BMJ}, + author = {Riley, Richard D. and Ensor, Joie and Snell, Kym I. E. and Harrell, Frank E. and Martin, Glen P. and Reitsma, Johannes B. and Moons, Karel G. M. and Collins, Gary and Smeden, Maarten van}, + month = mar, + year = {2020}, + pmid = {32188600}, + note = {Publisher: British Medical Journal Publishing Group +Section: Research Methods \& Reporting}, + pages = {m441}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\22XNPK4A\\Riley et al. - 2020 - Calculating the sample size required for developin.pdf:application/pdf;Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\3ZY26DGZ\\bmj.m441.html:text/html}, +} + +@misc{strubell_energy_2019, + title = {Energy and {Policy} {Considerations} for {Deep} {Learning} in {NLP}}, + url = {http://arxiv.org/abs/1906.02243}, + doi = {10.48550/arXiv.1906.02243}, + abstract = {Recent progress in hardware and methodology for training neural networks has ushered in a new generation of large networks trained on abundant data. These models have obtained notable gains in accuracy across many NLP tasks. However, these accuracy improvements depend on the availability of exceptionally large computational resources that necessitate similarly substantial energy consumption. As a result these models are costly to train and develop, both financially, due to the cost of hardware and electricity or cloud compute time, and environmentally, due to the carbon footprint required to fuel modern tensor processing hardware. In this paper we bring this issue to the attention of NLP researchers by quantifying the approximate financial and environmental costs of training a variety of recently successful neural network models for NLP. Based on these findings, we propose actionable recommendations to reduce costs and improve equity in NLP research and practice.}, + urldate = {2022-10-07}, + publisher = {arXiv}, + author = {Strubell, Emma and Ganesh, Ananya and McCallum, Andrew}, + month = jun, + year = {2019}, + note = {arXiv:1906.02243 [cs]}, + keywords = {Computer Science - Computation and Language}, + file = {arXiv Fulltext PDF:C\:\\Users\\ditdi\\Zotero\\storage\\YDG72EZN\\Strubell et al. - 2019 - Energy and Policy Considerations for Deep Learning.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\IC9XHB8M\\1906.html:text/html}, +} + +@article{uddin_comparing_2019, + title = {Comparing different supervised machine learning algorithms for disease prediction}, + volume = {19}, + issn = {1472-6947}, + url = {https://doi.org/10.1186/s12911-019-1004-8}, + doi = {10.1186/s12911-019-1004-8}, + abstract = {Supervised machine learning algorithms have been a dominant method in the data mining field. Disease prediction using health data has recently shown a potential application area for these methods. This study aims to identify the key trends among different types of supervised machine learning algorithms, and their performance and usage for disease risk prediction.}, + language = {en}, + number = {1}, + urldate = {2022-10-11}, + journal = {BMC Medical Informatics and Decision Making}, + author = {Uddin, Shahadat and Khan, Arif and Hossain, Md Ekramul and Moni, Mohammad Ali}, + month = dec, + year = {2019}, + keywords = {Machine learning, Disease prediction, Medical data, Supervised machine learning algorithm}, + pages = {281}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\JBZ6TPPE\\Uddin et al. - 2019 - Comparing different supervised machine learning al.pdf:application/pdf}, +} + +@article{vries_propensity_2018, + title = {Propensity {Score} {Estimation} {Using} {Classification} and {Regression} {Trees} in the {Presence} of {Missing} {Covariate} {Data}}, + volume = {7}, + issn = {2161-962X}, + url = {https://www.degruyter.com/document/doi/10.1515/em-2017-0020/html?casa_token=cjo6UkSu_a4AAAAA%3Aj6T3fMzn9NklGjb12hAa4NPkpCKhGX6gVgN_zL2-Pzi1HbNlqmwz9ulesKSQfcG-FILUVdT2D3vq}, + doi = {10.1515/em-2017-0020}, + abstract = {Data mining and machine learning techniques such as classification and regression trees (CART) represent a promising alternative to conventional logistic regression for propensity score estimation. Whereas incomplete data preclude the fitting of a logistic regression on all subjects, CART is appealing in part because some implementations allow for incomplete records to be incorporated in the tree fitting and provide propensity score estimates for all subjects. Based on theoretical considerations, we argue that the automatic handling of missing data by CART may however not be appropriate. Using a series of simulation experiments, we examined the performance of different approaches to handling missing covariate data; (i) applying the CART algorithm directly to the (partially) incomplete data, (ii) complete case analysis, and (iii) multiple imputation. Performance was assessed in terms of bias in estimating exposure-outcome effects among the exposed, standard error, mean squared error and coverage. Applying the CART algorithm directly to incomplete data resulted in bias, even in scenarios where data were missing completely at random. Overall, multiple imputation followed by CART resulted in the best performance. Our study showed that automatic handling of missing data in CART can cause serious bias and does not outperform multiple imputation as a means to account for missing data.}, + language = {en}, + number = {1}, + urldate = {2022-10-24}, + journal = {Epidemiologic Methods}, + author = {Vries, Bas B. L. Penning de and Smeden, Maarten van and Groenwold, Rolf H. H.}, + month = dec, + year = {2018}, + note = {Publisher: De Gruyter}, + keywords = {missing data, propensity score, CART, causal inference, multiple imputation}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\ACLQGZSZ\\Vries et al. - 2018 - Propensity Score Estimation Using Classification a.pdf:application/pdf}, +} + +@article{hafermann_using_2022, + title = {Using {Background} {Knowledge} from {Preceding} {Studies} for {Building} a {Random} {Forest} {Prediction} {Model}: {A} {Plasmode} {Simulation} {Study}}, + volume = {24}, + copyright = {http://creativecommons.org/licenses/by/3.0/}, + issn = {1099-4300}, + shorttitle = {Using {Background} {Knowledge} from {Preceding} {Studies} for {Building} a {Random} {Forest} {Prediction} {Model}}, + url = {https://www.mdpi.com/1099-4300/24/6/847}, + doi = {10.3390/e24060847}, + abstract = {There is an increasing interest in machine learning (ML) algorithms for predicting patient outcomes, as these methods are designed to automatically discover complex data patterns. For example, the random forest (RF) algorithm is designed to identify relevant predictor variables out of a large set of candidates. In addition, researchers may also use external information for variable selection to improve model interpretability and variable selection accuracy, thereby prediction quality. However, it is unclear to which extent, if at all, RF and ML methods may benefit from external information. In this paper, we examine the usefulness of external information from prior variable selection studies that used traditional statistical modeling approaches such as the Lasso, or suboptimal methods such as univariate selection. We conducted a plasmode simulation study based on subsampling a data set from a pharmacoepidemiologic study with nearly 200,000 individuals, two binary outcomes and 1152 candidate predictor (mainly sparse binary) variables. When the scope of candidate predictors was reduced based on external knowledge RF models achieved better calibration, that is, better agreement of predictions and observed outcome rates. However, prediction quality measured by cross-entropy, AUROC or the Brier score did not improve. We recommend appraising the methodological quality of studies that serve as an external information source for future prediction model development.}, + language = {en}, + number = {6}, + urldate = {2022-10-24}, + journal = {Entropy}, + author = {Hafermann, Lorena and Klein, Nadja and Rauch, Geraldine and Kammer, Michael and Heinze, Georg}, + month = jun, + year = {2022}, + note = {Number: 6 +Publisher: Multidisciplinary Digital Publishing Institute}, + keywords = {machine learning, calibration, sparsity, variable selection}, + pages = {847}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\55ZSZUX5\\Hafermann et al. - 2022 - Using Background Knowledge from Preceding Studies .pdf:application/pdf;Snapshot:C\:\\Users\\ditdi\\Zotero\\storage\\NXSWTBXU\\847.html:text/html}, +} + +@article{jaeger_oblique_2019, + title = {Oblique random survival forests}, + volume = {13}, + issn = {1932-6157}, + url = {https://projecteuclid.org/journals/annals-of-applied-statistics/volume-13/issue-3/Oblique-random-survival-forests/10.1214/19-AOAS1261.full}, + doi = {10.1214/19-AOAS1261}, + language = {en}, + number = {3}, + urldate = {2022-10-24}, + journal = {The Annals of Applied Statistics}, + author = {Jaeger, Byron C. and Long, D. Leann and Long, Dustin M. and Sims, Mario and Szychowski, Jeff M. and Min, Yuan-I and Mcclure, Leslie A. and Howard, George and Simon, Noah}, + month = sep, + year = {2019}, + file = {Jaeger et al. - 2019 - Oblique random survival forests.pdf:C\:\\Users\\ditdi\\Zotero\\storage\\YFAYH3NV\\Jaeger et al. - 2019 - Oblique random survival forests.pdf:application/pdf}, +} + +@article{wang_simulation-based_2022, + title = {A simulation-based tree method for building linear models with interactions}, + volume = {51}, + issn = {0361-0926}, + url = {https://doi.org/10.1080/03610926.2020.1749665}, + doi = {10.1080/03610926.2020.1749665}, + abstract = {Linear models are the most common predictive models for a continuous, discrete or categorical response and often include interaction terms, but for more than a few predictors interactions tend to be neglected because they add too many terms to the model. In this paper, we propose a simulation-based tree method to detect the interactions, which contributes to the predictions. In the method, we first bootstrap the observations and randomly choose a number of variables to build trees. The interactions between the roots and the corresponding leaves are collected. The times of each interaction that appear are counted. To obtain the benchmark of the number of each interaction that appears in the trees, the response values are substituted by randomly generated values and then we repeat the procedure. The interactions with occurrence frequency more than the benchmark are put into the regression models. Finally, we select variables by running LASSO for the model with main effects and the interactions obtained. In the experiments, our method shows good performances, especially for the data set with many interactions.}, + number = {2}, + urldate = {2022-10-24}, + journal = {Communications in Statistics - Theory and Methods}, + author = {Wang, Jin and Cabrera, Javier and Tsui, Kwok Leung}, + month = jan, + year = {2022}, + note = {Publisher: Taylor \& Francis +\_eprint: https://doi.org/10.1080/03610926.2020.1749665}, + keywords = {Simulation, prediction, regression, interaction, tree}, + pages = {404--413}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\RQCG5FHW\\Wang et al. - 2022 - A simulation-based tree method for building linear.pdf:application/pdf}, +} + +@misc{benvancalster_benvancalsterclassimb_calibration_2022, + title = {benvancalster/classimb\_calibration}, + url = {https://github.com/benvancalster/classimb_calibration/blob/ad521b46b32ec42689a05bc336fb3270a5c1f28e/simulation%20study/Simulation/performance_measures_wo_eci.R}, + urldate = {2022-11-29}, + author = {benvancalster}, + month = nov, + year = {2022}, + note = {original-date: 2022-02-14T15:52:06Z}, +} + +@article{van_calster_regression_2020, + title = {Regression shrinkage methods for clinical prediction models do not guarantee improved performance: {Simulation} study}, + volume = {29}, + issn = {0962-2802}, + shorttitle = {Regression shrinkage methods for clinical prediction models do not guarantee improved performance}, + url = {https://doi.org/10.1177/0962280220921415}, + doi = {10.1177/0962280220921415}, + abstract = {When developing risk prediction models on datasets with limited sample size, shrinkage methods are recommended. Earlier studies showed that shrinkage results in better predictive performance on average. This simulation study aimed to investigate the variability of regression shrinkage on predictive performance for a binary outcome. We compared standard maximum likelihood with the following shrinkage methods: uniform shrinkage (likelihood-based and bootstrap-based), penalized maximum likelihood (ridge) methods, LASSO logistic regression, adaptive LASSO, and Firth?s correction. In the simulation study, we varied the number of predictors and their strength, the correlation between predictors, the event rate of the outcome, and the events per variable. In terms of results, we focused on the calibration slope. The slope indicates whether risk predictions are too extreme (slope??1). The results can be summarized into three main findings. First, shrinkage improved calibration slopes on average. Second, the between-sample variability of calibration slopes was often increased relative to maximum likelihood. In contrast to other shrinkage approaches, Firth?s correction had a small shrinkage effect but showed low variability. Third, the correlation between the estimated shrinkage and the optimal shrinkage to remove overfitting was typically negative, with Firth?s correction as the exception. We conclude that, despite improved performance on average, shrinkage often worked poorly in individual datasets, in particular when it was most needed. The results imply that shrinkage methods do not solve problems associated with small sample size or low number of events per variable.}, + language = {en}, + number = {11}, + urldate = {2022-11-29}, + journal = {Statistical Methods in Medical Research}, + author = {Van Calster, Ben and van Smeden, Maarten and De Cock, Bavo and Steyerberg, Ewout W}, + month = nov, + year = {2020}, + note = {Publisher: SAGE Publications Ltd STM}, + pages = {3166--3178}, + file = {SAGE PDF Full Text:C\:\\Users\\ditdi\\Zotero\\storage\\YYECQNI2\\Van Calster et al. - 2020 - Regression shrinkage methods for clinical predicti.pdf:application/pdf}, +} + +@article{luijken_impact_2019, + title = {Impact of predictor measurement heterogeneity across settings on the performance of prediction models: {A} measurement error perspective}, + volume = {38}, + issn = {1097-0258}, + shorttitle = {Impact of predictor measurement heterogeneity across settings on the performance of prediction models}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.8183}, + doi = {10.1002/sim.8183}, + abstract = {It is widely acknowledged that the predictive performance of clinical prediction models should be studied in patients that were not part of the data in which the model was derived. Out-of-sample performance can be hampered when predictors are measured differently at derivation and external validation. This may occur, for instance, when predictors are measured using different measurement protocols or when tests are produced by different manufacturers. Although such heterogeneity in predictor measurement between derivation and validation data is common, the impact on the out-of-sample performance is not well studied. Using analytical and simulation approaches, we examined out-of-sample performance of prediction models under various scenarios of heterogeneous predictor measurement. These scenarios were defined and clarified using an established taxonomy of measurement error models. The results of our simulations indicate that predictor measurement heterogeneity can induce miscalibration of prediction and affects discrimination and overall predictive accuracy, to extents that the prediction model may no longer be considered clinically useful. The measurement error taxonomy was found to be helpful in identifying and predicting effects of heterogeneous predictor measurements between settings of prediction model derivation and validation. Our work indicates that homogeneity of measurement strategies across settings is of paramount importance in prediction research.}, + language = {en}, + number = {18}, + urldate = {2022-11-29}, + journal = {Statistics in Medicine}, + author = {Luijken, K. and Groenwold, R. H. H. and Van Calster, B. and Steyerberg, E. W. and van Smeden, M.}, + year = {2019}, + note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/sim.8183}, + keywords = {Brier score, calibration, discrimination, external validation, measurement error, measurement heterogeneity, prediction model}, + pages = {3444--3459}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\CEFYMMMH\\Luijken et al. - 2019 - Impact of predictor measurement heterogeneity acro.pdf:application/pdf}, +} + +@article{luijken_impact_2019-1, + title = {Impact of predictor measurement heterogeneity across settings on the performance of prediction models: {A} measurement error perspective}, + volume = {38}, + issn = {1097-0258}, + shorttitle = {Impact of predictor measurement heterogeneity across settings on the performance of prediction models}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.8183}, + doi = {10.1002/sim.8183}, + abstract = {It is widely acknowledged that the predictive performance of clinical prediction models should be studied in patients that were not part of the data in which the model was derived. Out-of-sample performance can be hampered when predictors are measured differently at derivation and external validation. This may occur, for instance, when predictors are measured using different measurement protocols or when tests are produced by different manufacturers. Although such heterogeneity in predictor measurement between derivation and validation data is common, the impact on the out-of-sample performance is not well studied. Using analytical and simulation approaches, we examined out-of-sample performance of prediction models under various scenarios of heterogeneous predictor measurement. These scenarios were defined and clarified using an established taxonomy of measurement error models. The results of our simulations indicate that predictor measurement heterogeneity can induce miscalibration of prediction and affects discrimination and overall predictive accuracy, to extents that the prediction model may no longer be considered clinically useful. The measurement error taxonomy was found to be helpful in identifying and predicting effects of heterogeneous predictor measurements between settings of prediction model derivation and validation. Our work indicates that homogeneity of measurement strategies across settings is of paramount importance in prediction research.}, + language = {en}, + number = {18}, + urldate = {2022-11-29}, + journal = {Statistics in Medicine}, + author = {Luijken, K. and Groenwold, R. H. H. and Van Calster, B. and Steyerberg, E. W. and van Smeden, M.}, + year = {2019}, + note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/sim.8183}, + keywords = {Brier score, calibration, discrimination, external validation, measurement error, measurement heterogeneity, prediction model}, + pages = {3444--3459}, + file = {Full Text PDF:C\:\\Users\\ditdi\\Zotero\\storage\\PN84LHP3\\Luijken et al. - 2019 - Impact of predictor measurement heterogeneity acro.pdf:application/pdf}, +} diff --git a/Practicals/JudithNeve/Week5/Week5.Rmd b/Practicals/JudithNeve/Week5/Week5.Rmd new file mode 100644 index 0000000..846ae4f --- /dev/null +++ b/Practicals/JudithNeve/Week5/Week5.Rmd @@ -0,0 +1,156 @@ +--- +title: "Tuning random forest hyperparameters" +author: "Judith Neve" +date: '2022-12-14' +output: + ioslides_presentation: + logo: logo.png + widescreen: true +bibliography: Thesis.bib +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = FALSE) +library(ggplot2) +library(tidyverse) +``` + +# Introduction + +## Random forests + +*Main idea*: combine many decision trees to classify an observation + +At each split, only a fraction of the predictors are sampled to consider a split. + +## Hyperparameters + +- Number of trees +- Number of candidate predictors +- Proportion of the sample used for fitting the tree +- Sample with or without replacement +- Minimum node size +- Split rule + +## Tuning + +Trying multiple options to find the best values! + +## Previous research + +Default values can lead to good *accuracy*, but *discrimination* and *calibration* are typically bad. + +## Current research + +How can we tune hyperparameters to improve overall model performance, taking computational intensivity into account? + +Three studies: + +- *Study 1: which hyperparameters to tune?* +- Study 2: what metric to optimise? +- Study 3: which hyperparameter search strategy to use? + + +# Methods + +## Data simulation + +Vary data characteristics: + +- Number of predictors: 8, 16, 32 +- Event fraction: 0.1, 0.3, 0.5 +- Sample size: 0.5$n$, $n$, 2$n$ ($n$ the required sample size to detect predictor effects for an AUC of 0.8; @riley_calculating_2020) + +Simulated using **logistic regression with strong interactions**: $$\begin{align} P(y_i = 1) & = \frac{exp(\mathbf\beta\mathbf{X})}{1 + exp(\mathbf\beta\mathbf{X})} \\& = \frac{exp(\beta_0 + \beta*\sum_{j=1}^px_{ij} + \gamma*\sum_{j=1}^{0.25*p}x_{ij}*x_{i(j+0.5p)})}{1 + exp(\beta_0 + \beta*\sum_{j=1}^px_{ij} + \gamma*\sum_{j=1}^{0.25*p}x_{ij}*x_{i(j+0.5p)})} \end{align}$$ + +## Data simulation + +```{r, cache =TRUE, echo=FALSE} +load("tuneonce.RData") +all_perf_dataset1 <- all_perf +dat1 <- dat +val_dataset1 <- val_dataset +load("tuneonce2.RData") +all_perf_dataset2 <- all_perf +dat2 <- dat +val_dataset2 <- val_dataset + +rm(list=ls()[!ls() %in% c("all_perf_dataset1", "all_perf_dataset2", "dat1", "dat2", "val_dataset1", "val_dataset2", "betas_matrix")]) + +all_perf_dataset1 <- all_perf_dataset1 %>% + mutate(dataset_id = 1) +all_perf_dataset2 <- all_perf_dataset2 %>% + mutate(dataset_id = 2) +all_perf <- rbind(all_perf_dataset1, all_perf_dataset2) +``` + +```{r, echo = FALSE, message=FALSE, warning=FALSE, fig.align='center'} +dat1 %>% + as.data.frame() %>% + mutate(Y = as.factor(Y), + Pred_value = as.numeric(Pred_value)) %>% + pivot_wider(names_from = "Pred_number", values_from = "Pred_value") %>% + mutate(Prob = exp(betas_matrix[1,3] + betas_matrix[1,4]*sum(X1:X8) + betas_matrix[1,5]*(X1*X5 + X2*X6))/(1+exp(betas_matrix[1,3] + betas_matrix[1,4]*sum(X1:X8) + betas_matrix[1,5]*(X1*X5 + X2*X6)))) %>% + ggplot(aes(x = Prob, col = Y, fill = Y)) + + geom_density(alpha = 0.2) + + theme_minimal() +``` + + +## Manipulations + +Vary hyperparameters that are tuned when fitting a random forest. + +
+ Always tuned: + + - Number of predictors + - Minimum node size + + All combinations of: + + - Number of trees + - Sample fraction + - Sampling with replacement + - Split rule +
+ +## Settings + +```{r, eval=FALSE, echo = TRUE} +hyperparameter_combinations <- expand.grid( + mtry = TRUE, + sample.fraction = c(TRUE, FALSE), + num.trees = c(TRUE, FALSE), + replace = c(TRUE, FALSE), + min.node.size = TRUE, + splitrule = c(TRUE, FALSE) +) +hyperparameter_combinations <- rbind(rep(FALSE, ncol(hyperparameter_combinations)), + hyperparameter_combinations) +``` + + +# Results + +## Mean performances + +```{r, echo = FALSE, warning = FALSE, message = FALSE} +require(DT) + +datatable( + all_perf %>% + pivot_longer(Runtime:CohensKappa, names_to = "Metric", values_to = "Performance") %>% + group_by(Metric, `Tuned hyperparameters`) %>% + summarise(`Performance mean` = mean(Performance), + `Performance SD` = sd(Performance)), + options = list(pageLength = 5) +) +``` + +## Discussion + +![](https://media.giphy.com/media/WHLf9qDDS97EY/giphy.gif) + +## References + diff --git a/Practicals/JudithNeve/Week5/Week5.html b/Practicals/JudithNeve/Week5/Week5.html new file mode 100644 index 0000000..e37a357 --- /dev/null +++ b/Practicals/JudithNeve/Week5/Week5.html @@ -0,0 +1,6003 @@ + + + + Tuning random forest hyperparameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ +

+

2022-12-14

+
+
+ +

Introduction

+ +

Random forests

+ +

Main idea: combine many decision trees to classify an observation

+ +

At each split, only a fraction of the predictors are sampled to consider a split.

+ +

Hyperparameters

+ +
    +
  • Number of trees
  • +
  • Number of candidate predictors
  • +
  • Proportion of the sample used for fitting the tree
  • +
  • Sample with or without replacement
  • +
  • Minimum node size
  • +
  • Split rule
  • +
+ +

Tuning

+ +

Trying multiple options to find the best values!

+ +

Previous research

+ +

Default values can lead to good accuracy, but discrimination and calibration are typically bad.

+ +

Current research

+ +

How can we tune hyperparameters to improve overall model performance, taking computational intensivity into account?

+ +

Three studies:

+ +
    +
  • Study 1: which hyperparameters to tune?
  • +
  • Study 2: what metric to optimise?
  • +
  • Study 3: which hyperparameter search strategy to use?
  • +
+ +

Methods

+ +

Data simulation

+ +

Vary data characteristics:

+ +
    +
  • Number of predictors: 8, 16, 32
  • +
  • Event fraction: 0.1, 0.3, 0.5
  • +
  • Sample size: 0.5\(n\), \(n\), 2\(n\) (\(n\) the required sample size to detect predictor effects for an AUC of 0.8; Riley et al. (2020))
  • +
+ +

Simulated using logistic regression with strong interactions: \[\begin{align} P(y_i = 1) & = \frac{exp(\mathbf\beta\mathbf{X})}{1 + exp(\mathbf\beta\mathbf{X})} \\& = \frac{exp(\beta_0 + \beta*\sum_{j=1}^px_{ij} + \gamma*\sum_{j=1}^{0.25*p}x_{ij}*x_{i(j+0.5p)})}{1 + exp(\beta_0 + \beta*\sum_{j=1}^px_{ij} + \gamma*\sum_{j=1}^{0.25*p}x_{ij}*x_{i(j+0.5p)})} \end{align}\]

+ +

Data simulation

+ +

+ +

Manipulations

+ +

Vary hyperparameters that are tuned when fitting a random forest.

+ +
+

Always tuned:

+ +
    +
  • Number of predictors
  • +
  • Minimum node size
  • +
+ +

All combinations of:

+ +
    +
  • Number of trees
  • +
  • Sample fraction
  • +
  • Sampling with replacement
  • +
  • Split rule
  • +
+ +

Settings

+ +
hyperparameter_combinations <- expand.grid(
+  mtry = TRUE,
+  sample.fraction = c(TRUE, FALSE),
+  num.trees = c(TRUE, FALSE),
+  replace = c(TRUE, FALSE),
+  min.node.size = TRUE,
+  splitrule = c(TRUE, FALSE)
+)
+hyperparameter_combinations <- rbind(rep(FALSE, ncol(hyperparameter_combinations)),
+                                     hyperparameter_combinations)
+ +

Results

+ +

Mean performances

+ +
+ + +

Discussion

+ + + +

References

+ +
+
+

Riley, Richard D., Joie Ensor, Kym I. E. Snell, Frank E. Harrell, Glen P. Martin, Johannes B. Reitsma, Karel G. M. Moons, Gary Collins, and Maarten van Smeden. 2020. +“Calculating the Sample Size Required for Developing a Clinical Prediction Model.” BMJ 368 (March): m441. https://doi.org/10.1136/bmj.m441.

+ + + + +
+ + + + + + + + + diff --git a/Practicals/JudithNeve/Week5/Week5_files/figure-html/unnamed-chunk-2-1.png b/Practicals/JudithNeve/Week5/Week5_files/figure-html/unnamed-chunk-2-1.png new file mode 100644 index 0000000..293eafe Binary files /dev/null and b/Practicals/JudithNeve/Week5/Week5_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/Practicals/JudithNeve/Week5/logo.png b/Practicals/JudithNeve/Week5/logo.png new file mode 100644 index 0000000..22b4dd5 Binary files /dev/null and b/Practicals/JudithNeve/Week5/logo.png differ diff --git a/Practicals/JudithNeve/Week5/tuneonce.RData b/Practicals/JudithNeve/Week5/tuneonce.RData new file mode 100644 index 0000000..af51bf7 Binary files /dev/null and b/Practicals/JudithNeve/Week5/tuneonce.RData differ diff --git a/Practicals/JudithNeve/Week5/tuneonce2.RData b/Practicals/JudithNeve/Week5/tuneonce2.RData new file mode 100644 index 0000000..6827e2c Binary files /dev/null and b/Practicals/JudithNeve/Week5/tuneonce2.RData differ