|
| 1 | + |
| 2 | +### Atomic data types |
| 3 | + |
| 4 | +# Numbers |
| 5 | +x <- 1 |
| 6 | +# arithmetic |
| 7 | +x + 1 # addition |
| 8 | +x - 1 # subtraction |
| 9 | +x * 2 # multiplication |
| 10 | +x / 2 # division |
| 11 | +x ** 2 # exponentiation |
| 12 | +1 / (2 - 0.5) # BEDMAS rules |
| 13 | + |
| 14 | +# Text (strings) |
| 15 | +greeting <- "hello" |
| 16 | +greeting <- 'hello' |
| 17 | +sentence <- 'He says "hello"' |
| 18 | +sentence <- "I'm busy now" |
| 19 | + |
| 20 | +# Booleans (true/false values) |
| 21 | +bool1 <- FALSE |
| 22 | +bool2 <- TRUE |
| 23 | +# Short forms (careful not to overwrite!) |
| 24 | +bool1 <- T |
| 25 | +bool2 <- F |
| 26 | + |
| 27 | +# Booleans as a result of comparisons |
| 28 | +x <- 2 |
| 29 | +y <- 3 |
| 30 | +x < y |
| 31 | +x <= y |
| 32 | +x > y |
| 33 | +x >= y |
| 34 | +x == y |
| 35 | +x != y |
| 36 | + |
| 37 | +# Boolean operators |
| 38 | +x <- TRUE |
| 39 | +y <- FALSE |
| 40 | +x & y # and |
| 41 | +x | y # or |
| 42 | +!y # not |
| 43 | + |
| 44 | +# Order of operations with Booleans |
| 45 | + |
| 46 | +# Suppose we want to know if we can publish our results. We can publish if the |
| 47 | +# results are significant or the study was preregistered, and if we have the |
| 48 | +# funds to publish open-access or the journal will waive open-access fee. |
| 49 | +# Suppose we have the funds to publish open-access even though the journal won't |
| 50 | +# waive the fee, but out results were not significant and the study was not |
| 51 | +# preregistered. Let's create some variables to represent the situation: |
| 52 | +funds <- TRUE # We have the funds |
| 53 | +waive <- FALSE # The journal won't waive the fee |
| 54 | +signif <- FALSE # Our results are not significant |
| 55 | +prereg <- FALSE # Our study was not preregistered |
| 56 | +# So, can we publish? |
| 57 | +can_publish <- funds | waive & signif | prereg |
| 58 | +print(can_publish) |
| 59 | +# Surprisingly, the above expression says we can! Why is this? It's because the |
| 60 | +# & operator takes precedence over the | operators, so it's evaluated first. The |
| 61 | +# expression then reduces to TRUE | FALSE | FALSE, which evaluates to TRUE |
| 62 | +# (regardless of which | operator is applied first). To ensure that the | |
| 63 | +# operators are applied first, we should use brackets: |
| 64 | +can_publish <- (funds | waive) & (signif | prereg) |
| 65 | +print(can_publish) |
| 66 | +# Now the result makes sense---we can't publish, but at least we know about |
| 67 | +# Boolean order of operations! |
| 68 | + |
| 69 | +### Collections |
| 70 | + |
| 71 | +# vectors |
| 72 | + |
| 73 | +x <- c(1, 2, 3, 4, 5) |
| 74 | +y <- c(6, 7, 8) |
| 75 | +z <- c(x, y) |
| 76 | + |
| 77 | +a <- c() |
| 78 | +b <- c(1, 2, 3) |
| 79 | +d <- c(a, b) |
| 80 | + |
| 81 | +a <- c(1, 2, 3, 'a', 'b', 'c') # Data types in vectors must all be the same |
| 82 | +print(a) # Note that numbers are converted to text |
| 83 | + |
| 84 | +# Storing values by name |
| 85 | + |
| 86 | +a <- c(first = 1, second = 2, third = 3, fourth = 4) |
| 87 | +print(a) |
| 88 | + |
| 89 | +# Accessing values in vectors |
| 90 | + |
| 91 | +a[3] # Single items |
| 92 | +a[2:4] # Ranges of items |
| 93 | +a[c(1, 3, 4)] # Specific items |
| 94 | +a[-2] # Excluding single items |
| 95 | +a[-2:-4] # Excluding ranges of items |
| 96 | +a[c(-1, -3, -4)] # Excluding specific items |
| 97 | +a['first'] # Accessing by name |
| 98 | +a[c('first', 'second')] # Accessing multiple by name |
| 99 | + |
| 100 | +# Storing mixed data types in lists |
| 101 | + |
| 102 | +a <- list(1, 2, 3, 'a', 'b', 'c') |
| 103 | +print(a) |
| 104 | + |
| 105 | +# Accessing actual items vs sub-lists |
| 106 | + |
| 107 | +a[[1]] # Item within list |
| 108 | +a[1] # Subsection of list |
| 109 | + |
| 110 | +# What's the difference? |
| 111 | + |
| 112 | +a[[1]] + 1 |
| 113 | +a[1] + 1 |
| 114 | + |
| 115 | +# Storing items by name |
| 116 | +a <- list(first = 1, second = 2, third = 3) |
| 117 | +print(a) |
| 118 | +a['first'] |
| 119 | +a[['first']] |
| 120 | +a$first |
| 121 | + |
| 122 | +# Selecting multiple items |
| 123 | +a[[c('first', 'second')]] # Error! |
| 124 | +unlist(a[c('first', 'second')]) |
| 125 | + |
| 126 | +### Conditionals |
| 127 | + |
| 128 | +# p values example: |
| 129 | + |
| 130 | +p <- 0.009 |
| 131 | + |
| 132 | +if (p < 0.001) { |
| 133 | + print('Our results are very significant!') |
| 134 | +} else if (p < 0.01) { |
| 135 | + print('Our results are fairly significant') |
| 136 | +} else if (p < 0.05) { |
| 137 | + print('Our results are technically significant') |
| 138 | +} else { |
| 139 | + print('Our results are not significant') |
| 140 | +} |
| 141 | + |
| 142 | +if (p < 0.001) { |
| 143 | + print('Our results are very significant!') |
| 144 | +} |
| 145 | +if (p < 0.01) { |
| 146 | + print('Our results are fairly significant') |
| 147 | +} |
| 148 | + |
| 149 | +### Loops |
| 150 | + |
| 151 | +# Simulate example data |
| 152 | +n_data <- 100 |
| 153 | +x <- rnorm(n_data) |
| 154 | +y <- x + rnorm(n_data) |
| 155 | +plot(y ~ x) |
| 156 | + |
| 157 | +# Compute a single bootstrap correlation |
| 158 | +n_sample <- 50 |
| 159 | +sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 160 | +sampled_x <- x[sample_idx] |
| 161 | +sampled_y <- y[sample_idx] |
| 162 | +cor(sampled_x, sampled_y) |
| 163 | + |
| 164 | +# Do bootstrapped correlations in a loop |
| 165 | +n_iters <- 200 |
| 166 | +iter_count <- 0 |
| 167 | +cors <- c() |
| 168 | +repeat { |
| 169 | + # Compute bootstrap correlation |
| 170 | + sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 171 | + sampled_x <- x[sample_idx] |
| 172 | + sampled_y <- y[sample_idx] |
| 173 | + curr_cor <- cor(sampled_x, sampled_y) |
| 174 | + # Accumulate |
| 175 | + cors <- c(cors, curr_cor) |
| 176 | + # Keep count |
| 177 | + iter_count <- iter_count + 1 |
| 178 | + if (iter_count == n_iters) { |
| 179 | + break |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +# More succinctly using a while loop: |
| 184 | +iter_count <- 0 |
| 185 | +cors <- c() |
| 186 | +while (iter_count < n_iters) { |
| 187 | + sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 188 | + sampled_x <- x[sample_idx] |
| 189 | + sampled_y <- y[sample_idx] |
| 190 | + curr_cor <- cor(sampled_x, sampled_y) |
| 191 | + cors <- c(cors, curr_cor) |
| 192 | + iter_count <- iter_count + 1 |
| 193 | +} |
| 194 | + |
| 195 | +# Yet more succinctly using a for loop |
| 196 | +cors <- rep(NA, n_iters) |
| 197 | +for (iter_count in 1:n_iters) { |
| 198 | + sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 199 | + sampled_x <- x[sample_idx] |
| 200 | + sampled_y <- y[sample_idx] |
| 201 | + curr_cor <- cor(sampled_x, sampled_y) |
| 202 | + cors[iter_count] <- curr_cor |
| 203 | +} |
| 204 | + |
| 205 | +# Example |
| 206 | + |
| 207 | +data.file.names <- c( |
| 208 | + 'data-file-1.csv', |
| 209 | + 'data-file-2.csv', |
| 210 | + 'data-file-3.csv', |
| 211 | + 'data-file-4.csv', |
| 212 | + 'data-file-5.csv' |
| 213 | +) |
| 214 | + |
| 215 | +cors <- c() |
| 216 | +for (data.file.name in data.file.names) { |
| 217 | + data.file <- read.csv(data.file.name) |
| 218 | + cors <- c(cors, xycor(data.file)) |
| 219 | +} |
| 220 | + |
| 221 | +### Functions |
| 222 | + |
| 223 | +bootstrap_corr <- function(x, y, n_iters, n_samples) { |
| 224 | + cors <- c() |
| 225 | + for (i in 1:n_iters) { |
| 226 | + sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 227 | + cors[i] <- cor(x[sample_idx], y[sample_idx]) |
| 228 | + } |
| 229 | + return(cors) |
| 230 | +} |
| 231 | +cors <- bootstrap_corr(x, y, 100, 50) |
| 232 | +hist(cors) |
| 233 | + |
| 234 | +# Default input values |
| 235 | +bootstrap_corr <- function(x, y, n_iters = 100, n_samples = 50) { |
| 236 | + cors <- c() |
| 237 | + for (i in 1:n_iters) { |
| 238 | + sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 239 | + cors[i] <- cor(x[sample_idx], y[sample_idx]) |
| 240 | + } |
| 241 | + return(cors) |
| 242 | +} |
| 243 | +cors <- bootstrap_corr(x, y) |
| 244 | +hist(cors) |
| 245 | + |
| 246 | +# Functions within functions |
| 247 | + |
| 248 | +subsampled_cor <- function(x, y, n_samples) { |
| 249 | + sample_idx <- sample(1:n_data, n_sample, replace = TRUE) |
| 250 | + return(cor(x[sample_idx], y[sample_idx])) |
| 251 | +} |
| 252 | + |
| 253 | +bootstrap_corr <- function(x, y, n_iters = 100, n_samples = 50) { |
| 254 | + cors <- c() |
| 255 | + for (i in 1:n_iters) { |
| 256 | + cors[i] <- subsampled_cor(x, y, n_samples) # !! |
| 257 | + } |
| 258 | + return(cors) |
| 259 | +} |
| 260 | + |
| 261 | +# Local vs global variables |
| 262 | + |
| 263 | +# Local variables do not exist outside functions |
| 264 | +create_txt <- function() { |
| 265 | + txt <- 'abc' |
| 266 | + print(txt) |
| 267 | +} |
| 268 | +create_txt() |
| 269 | +print(txt) |
| 270 | + |
| 271 | +# Local variables can have the same names as global variables |
| 272 | +txt2 <- 'hello' |
| 273 | +change_txt2 <- function() { |
| 274 | + txt2 <- 'goodbye' |
| 275 | + print(txt2) |
| 276 | +} |
| 277 | +print(txt2) |
| 278 | +change_txt2() |
| 279 | +print(txt2) |
| 280 | + |
| 281 | +# <<- can create global variables even within functions |
| 282 | +create_txt3 <- function() { |
| 283 | + txt3 <<- 'abc' |
| 284 | +} |
| 285 | +create_txt3() |
| 286 | +print(txt3) |
0 commit comments