SrivastavaLab
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 0 deletions b/‎.gitignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎01_analysis.R‎
Lines changed: 465 additions & 0 deletions b/‎01_analysis.R‎
Lines changed: 465 additions & 0 deletions
diff --git a/‎02_analysis_PCA_nullmodels.R‎
Lines changed: 108 additions & 0 deletions b/‎02_analysis_PCA_nullmodels.R‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎03_supplemental_figures.R‎
Lines changed: 102 additions & 0 deletions b/‎03_supplemental_figures.R‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎04_adonis.R‎
Lines changed: 134 additions & 0 deletions b/‎04_adonis.R‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎04_adonis.html‎
Lines changed: 496 additions & 0 deletions b/‎04_adonis.html‎
Lines changed: 496 additions & 0 deletions
@@ -2,3 +2,9 @@
 .Rhistory
 .RData
 .Ruserdata
+.DS_Store
+Old_scripts/*
+Previous\ analyses/*
+Manuscript/*
+eml.xml
+trait_full.csv
@@ -0,0 +1,108 @@
+# rm(list=ls())
+require(ade4)
+require(geometry)
+
+
+# Diaz et al 2016 Nature
+# ftp://pbil.univ-lyon1.fr/pub/datasets/dray/Diaz_Nature/
+
+## selection of a proportion of data
+subselect.data <- function(df, percent = 0.95){
+  df0 <- scale(df, scale = FALSE)
+  di <- rowSums(df0^2)
+  thresh <- quantile(di, percent)
+  idx <- which(di>thresh)
+  if(length(idx) > 0)
+    df <- df[-idx,]
+  return(df)
+}
+
+## permutation models
+nullmodel <- function(tab, model){
+  if(model == 1){
+    res <- apply(tab, 2, function(x) runif(nrow(tab), min = min(x), max = max(x)))
+  }
+  if(model == 2){
+    res <- scale(matrix(rnorm(ncol(tab) * nrow(tab)), nrow(tab), ncol(tab)))
+  }
+  if(model == 3){
+    res <- apply(tab,2,sample)
+  }
+  if(model == 4){
+    corM <- cor(tab)
+    res <- scale(scale(matrix(rnorm(ncol(tab) * nrow(tab)), nrow(tab), ncol(tab)))%*%chol(corM))
+  }
+  return(res)
+}
+
+# load the full dataset
+axes.raw <- read.table("Current Results - 0.7.7/RES_pca_individuals_0.7.7_ranks.txt", h = TRUE, row.names = 1)
+
+axes <- axes.raw[,1:4]
+head(axes)
+dim(axes)
+
+axes<-unique(axes) # Select unique scores # 143
+dim(axes)
+
+axes <- scale(axes, center = TRUE, scale = TRUE) # scale for mean zero and standard deviation one
+apply(axes, 2 , mean)
+apply(axes, 2 , sd)
+cor(axes)
+
+axes95 <- subselect.data(axes, 0.95) # select 0.95 of dataset 
+dim(axes95)
+
+# Compute observed convex hull
+blob95 <- convhulln(axes95,"FA")
+obs.vol95 <- blob95$vol
+obs.vol95
+
+# function for apply the null models "runs" times
+run<-function(x, runs, model){
+  res<-matrix(NA, runs,1)
+  for(i in 1:runs){
+    axes_null<-subselect.data(nullmodel(x, model), 0.95) # select 0.95 
+    res[i,] <- convhulln(axes_null,"FA")$vol
+  }
+return(res)
+}
+
+# run the null models  
+res_vol_model_1 <- run(axes, 999, 1)
+res_vol_model_2 <- run(axes, 999, 2)
+res_vol_model_3 <- run(axes, 999, 3)
+
+# Compute p value manually
+(sum(res_vol_model_1<=obs.vol95)+1)/(999+1)
+(sum(res_vol_model_2<=obs.vol95)+1)/(999+1)
+(sum(res_vol_model_3<=obs.vol95)+1)/(999+1)
+
+# Compute p value automatically
+res1 <- as.randtest(obs=obs.vol95,sim=res_vol_model_1[,1], alter="less")
+res2 <- as.randtest(obs=obs.vol95,sim=res_vol_model_2[,1], alter="less")
+res3 <- as.randtest(obs=obs.vol95,sim=res_vol_model_3[,1], alter="less")
+res1
+res2
+res3
+
+#  Ratio between observed volume and null models
+Ratio1<-100 - mean(obs.vol95/res_vol_model_1[,1]) * 100
+Ratio2<-100 - mean(obs.vol95/res_vol_model_2[,1]) * 100
+Ratio3<-100 - mean(obs.vol95/res_vol_model_3[,1]) * 100
+Ratio1
+Ratio2
+Ratio3
+# save.image("workspace_PCA_nullmodels_0.7.7")
+
+Res<-matrix(NA,3,6)
+Res[1,]<-cbind(res1$obs, res1$expvar[1], res1$expvar[2], res1$expvar[3],  res1$pvalue, Ratio1)
+Res[2,]<-cbind(res2$obs, res2$expvar[1], res2$expvar[2], res2$expvar[3],  res3$pvalue, Ratio2)
+Res[3,]<-cbind(res3$obs, res3$expvar[1], res3$expvar[2], res3$expvar[3],  res3$pvalue, Ratio3)
+colnames(Res)<-c("VolObs", "Std.Obs", "Expectation", "Variance", "pvalue", "Ratio.Obs.Null")
+rownames(Res)<-c("Model 1", "Model 2", "Model 3")
+Res
+
+setwd("./Current Results - 0.7.7")
+# write.table(Res,"results_convhulln_unique_scores_0.7.7_ranks.txt")
+# write.table(Res,"results_convhulln_852_scores_0.7.7_ranks.txt")
@@ -0,0 +1,102 @@
+
+## load libraries and data
+
+library(fwdata)
+library(dplyr)
+library(ggplot2)
+library(ggmap)
+library(ggrepel)
+
+# source("PCA_with_NA.R")
+
+library(fwdata)
+fw_versions(local = TRUE)
+fwd <- fw_data("0.7.7")
+
+
+SAm <- c(left = -120, bottom = -56, right = -34, top = 30)
+# sa_map <- get_stamenmap(SAm, zoom = 3, maptype = "toner")
+
+
+sa_map <- readRDS("../CESAB-detritivore-predator-dropbox/analysis_output/sa_stamenmap.rds")
+
+sa_map %>% 
+  ggmap() + 
+  geom_count(data = fwd$visits %>% 
+               select(lat = latitude, lon =longitude),
+             pch = 21, fill = "red", alpha = 0.8)
+
+ggsave("supplemental_figures/map_number_of_visits.png")
+
+# change scale of size to be larger
+
+
+sa_map %>% 
+  ggmap() + 
+  geom_bin2d(data = fwd$visits %>% 
+               select(lat = latitude, lon =longitude), binwidth = c(1, 1)) + 
+  scale_fill_continuous(high = "red")
+
+
+
+# taxa ranked by their number of morphospecies ----------------------------
+
+glimpse(fwd$traits)
+
+# list of species ids in each dataset:
+
+taxa_by_site <- fwd$abundance %>% 
+  select(dataset_id, species_id, bwg_name) %>% distinct %>% 
+  left_join(fwd$traits %>% select(species_id, taxon_level)) %>% 
+  group_by(dataset_id, taxon_level) %>% 
+  tally %>% 
+  mutate(n = n / sum(n))
+
+taxa_by_site$taxon_level %>% unique %>% dput
+
+
+taxa_num <- 1:11
+names(taxa_num) <- c("species_name","genus", "tribe", "subfamily", "family","subord", "ord", "subclass", "class","phylum", 
+    NA) 
+taxa_num
+
+taxa_by_site %>% 
+  mutate(taxa_num = taxa_num[taxon_level]) %>% 
+  ggplot(aes(x = taxa_num, y = n, group = dataset_id)) + geom_line()
+
+library(ggjoy)
+
+taxa_by_site %>% 
+  ungroup %>% 
+  left_join(fwd$datasets %>% select(dataset_id, name)) %>% 
+  mutate(taxa_num = taxa_num[taxon_level],
+         taxon_ord = forcats::fct_reorder(taxon_level, taxa_num)) %>% 
+  ggplot(aes(x = taxon_ord, height = n,y = name, group = dataset_id)) + 
+  geom_ridgeline(scale = 3.2, alpha = 0.6) +
+  theme_minimal(base_size = 14) +
+  theme(axis.text.y = element_text(vjust = 0),
+        axis.text.x = element_text(angle = -45, hjust = 0)) 
+
+ggsave("supplemental_figures/taxonomic_identifications.png")
+
+# table of datasets -------------------------------------------------------
+
+# main characteristics of sampling sites (location, environment) and sampled bromeliads (species identity, sampling effort)
+
+fwd$datasets %>% 
+  select(country, `field site` = location, year)
+
+# bromeliads sampling and species identity
+library(tidyr)
+library(purrr)
+fwd$bromeliads %>% group_by(visit_id) %>% 
+  nest %>% 
+  mutate(start_date = map_chr(data, ~.x$collection_date %>% min(na.rm = TRUE) %>% as.character),
+         end_date = map_chr(data, ~.x$collection_date %>% max(na.rm = TRUE) %>% as.character),
+         n_broms = map_dbl(data, nrow),
+         brom_spp = map_chr(data, ~ unique(.x$species) %>% paste0(collapse = "; "))) %>% 
+  # drop the `data` column
+  keep(is_atomic) %>% 
+  left_join(fwd$visits %>% select(visit_id, latitude, longitude)) %>% 
+  readr::write_csv("supplemental_figures/visit_information_table.csv")
+
@@ -0,0 +1,134 @@
+
+# Andrew & Regis
+# Nov 2017
+# Doing an adonis analysis of the positions of the animals on the first princiapal component axes
+
+
+# load data ---------------------------------------------------------------
+
+
+library(fwdata)
+library(plotly)
+library(tidyverse)
+library(vegan)
+
+
+
+# define functions --------------------------------------------------------
+
+calculate_adonis_taxo_level <- function(taxon, .taxa_below_taxon, .first_four_axes){
+  
+  # join the taxa with enough information for an adonis with the first four PCAs
+  below_taxon_complete <- .taxa_below_taxon %>% 
+    select(species_id, taxon_name, taxon_level, family, ord) %>% 
+    left_join(.first_four_axes) %>% 
+    drop_na(Axis.1)
+  
+  # axis scores
+  axis_values <- as.matrix(below_taxon_complete[,c("Axis.1", "Axis.2", "Axis.3", "Axis.4")])
+  
+  ff <- sprintf("axis_values ~ %s", taxon)
+  res <- adonis(as.formula(ff), data = below_taxon_complete, method = "euclidian")
+
+  return(res)
+  
+}
+
+# load the full dataset This is the very last version of the PCA  -- the species
+# scores on axes 1 and 4 of the PCA according to the most recent data version.
+axes.raw <- read.table("Current Results - 0.7.7/RES_pca_individuals_0.7.7.txt",
+                       header = TRUE, row.names = 1)
+
+fwd <- fw_data("0.7.7")
+str(fwd, max.level = 1)
+
+
+first_four_axes <- axes.raw %>% 
+  rownames_to_column("species_id") %>% 
+  select(species_id, Axis.1:Axis.4)
+
+# put in the taxonomic information
+
+# first filter for all taxa identified to the level of below family: everything
+# which was either genus or species
+
+glimpse(fwd$traits)
+
+# each taxonomic level has a little number to make it easy to filter by "rank of
+# taxonomic group"
+
+
+# hey so what is the distribution of lowest taxonomic levels anyway?
+
+fwd$traits %>%
+  group_by(taxon_number, taxon_level) %>% 
+  tally %>% 
+  arrange(taxon_number)
+
+
+# Give me every morphospecies which was identified below family 
+taxa_below_family <- fwd$traits %>% 
+  select(species_id:taxon_number) %>% 
+  filter(!(taxon_number <= 9))
+
+
+
+# to conduct an adonis we need to put this info together with the axis scores:
+
+
+taxa_below_family %>% 
+  select(species_id, taxon_name, taxon_level, family) %>% 
+  left_join(first_four_axes) %>% 
+  ggplot(aes(x  = Axis.1, y = Axis.2, colour = family))+ geom_point() + guides(colour = FALSE)
+
+# # Why is there missing data? 
+# at_least_to_genera %>% 
+#   select(species_id, taxon_name, taxon_level, genus) %>% 
+#   left_join(first_four_axes) %>% 
+#   visdat::vis_miss(.)
+# 
+# # Who are these sad animals? 
+# at_least_to_genera %>% 
+#   select(species_id, taxon_name, taxon_level, genus) %>% 
+#   left_join(first_four_axes) %>% 
+#   filter(is.na(Axis.1))
+
+genus_adonis <- calculate_adonis_taxo_level("family", taxa_below_family, first_four_axes)
+
+summary(genus_adonis)
+genus_adonis
+
+
+
+# order level -------------------------------------------------------------
+
+
+# Give me every morphospecies which was identified below order
+taxa_below_order <- fwd$traits %>% 
+  select(species_id:taxon_number) %>% 
+  filter(!(taxon_number <= 8 ))
+
+order_adonis <- calculate_adonis_taxo_level("ord", taxa_below_order, first_four_axes)
+
+summary(order_adonis)
+order_adonis
+
+
+
+
+# 3D plots of the dots ----------------------------------------------------
+
+family_axis <- taxa_below_family %>% 
+  select(species_id, taxon_name, taxon_level, family) %>% 
+  left_join(first_four_axes)
+
+
+
+family_axis %>% glimpse %>% 
+  plot_ly(x = ~ Axis.1, y = ~Axis.2, z = ~Axis.3) %>% 
+  add_markers(color = ~family)
+
+
+family_axis %>% glimpse %>% 
+  plot_ly(x = ~ Axis.2, y = ~Axis.3, z = ~Axis.4) %>% 
+  add_markers(color = ~family)