Skip to content

Commit 9b53c4c

Browse files
cereghinoaammd
authored andcommitted
all scripts and latest results
1 parent 6e1fd4e commit 9b53c4c

18 files changed

+3454
-0
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,9 @@
22
.Rhistory
33
.RData
44
.Ruserdata
5+
.DS_Store
6+
Old_scripts/*
7+
Previous\ analyses/*
8+
Manuscript/*
9+
eml.xml
10+
trait_full.csv

01_analysis.R

Lines changed: 465 additions & 0 deletions
Large diffs are not rendered by default.

02_analysis_PCA_nullmodels.R

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# rm(list=ls())
2+
require(ade4)
3+
require(geometry)
4+
5+
6+
# Diaz et al 2016 Nature
7+
# ftp://pbil.univ-lyon1.fr/pub/datasets/dray/Diaz_Nature/
8+
9+
## selection of a proportion of data
10+
subselect.data <- function(df, percent = 0.95){
11+
df0 <- scale(df, scale = FALSE)
12+
di <- rowSums(df0^2)
13+
thresh <- quantile(di, percent)
14+
idx <- which(di>thresh)
15+
if(length(idx) > 0)
16+
df <- df[-idx,]
17+
return(df)
18+
}
19+
20+
## permutation models
21+
nullmodel <- function(tab, model){
22+
if(model == 1){
23+
res <- apply(tab, 2, function(x) runif(nrow(tab), min = min(x), max = max(x)))
24+
}
25+
if(model == 2){
26+
res <- scale(matrix(rnorm(ncol(tab) * nrow(tab)), nrow(tab), ncol(tab)))
27+
}
28+
if(model == 3){
29+
res <- apply(tab,2,sample)
30+
}
31+
if(model == 4){
32+
corM <- cor(tab)
33+
res <- scale(scale(matrix(rnorm(ncol(tab) * nrow(tab)), nrow(tab), ncol(tab)))%*%chol(corM))
34+
}
35+
return(res)
36+
}
37+
38+
# load the full dataset
39+
axes.raw <- read.table("Current Results - 0.7.7/RES_pca_individuals_0.7.7_ranks.txt", h = TRUE, row.names = 1)
40+
41+
axes <- axes.raw[,1:4]
42+
head(axes)
43+
dim(axes)
44+
45+
axes<-unique(axes) # Select unique scores # 143
46+
dim(axes)
47+
48+
axes <- scale(axes, center = TRUE, scale = TRUE) # scale for mean zero and standard deviation one
49+
apply(axes, 2 , mean)
50+
apply(axes, 2 , sd)
51+
cor(axes)
52+
53+
axes95 <- subselect.data(axes, 0.95) # select 0.95 of dataset
54+
dim(axes95)
55+
56+
# Compute observed convex hull
57+
blob95 <- convhulln(axes95,"FA")
58+
obs.vol95 <- blob95$vol
59+
obs.vol95
60+
61+
# function for apply the null models "runs" times
62+
run<-function(x, runs, model){
63+
res<-matrix(NA, runs,1)
64+
for(i in 1:runs){
65+
axes_null<-subselect.data(nullmodel(x, model), 0.95) # select 0.95
66+
res[i,] <- convhulln(axes_null,"FA")$vol
67+
}
68+
return(res)
69+
}
70+
71+
# run the null models
72+
res_vol_model_1 <- run(axes, 999, 1)
73+
res_vol_model_2 <- run(axes, 999, 2)
74+
res_vol_model_3 <- run(axes, 999, 3)
75+
76+
# Compute p value manually
77+
(sum(res_vol_model_1<=obs.vol95)+1)/(999+1)
78+
(sum(res_vol_model_2<=obs.vol95)+1)/(999+1)
79+
(sum(res_vol_model_3<=obs.vol95)+1)/(999+1)
80+
81+
# Compute p value automatically
82+
res1 <- as.randtest(obs=obs.vol95,sim=res_vol_model_1[,1], alter="less")
83+
res2 <- as.randtest(obs=obs.vol95,sim=res_vol_model_2[,1], alter="less")
84+
res3 <- as.randtest(obs=obs.vol95,sim=res_vol_model_3[,1], alter="less")
85+
res1
86+
res2
87+
res3
88+
89+
# Ratio between observed volume and null models
90+
Ratio1<-100 - mean(obs.vol95/res_vol_model_1[,1]) * 100
91+
Ratio2<-100 - mean(obs.vol95/res_vol_model_2[,1]) * 100
92+
Ratio3<-100 - mean(obs.vol95/res_vol_model_3[,1]) * 100
93+
Ratio1
94+
Ratio2
95+
Ratio3
96+
# save.image("workspace_PCA_nullmodels_0.7.7")
97+
98+
Res<-matrix(NA,3,6)
99+
Res[1,]<-cbind(res1$obs, res1$expvar[1], res1$expvar[2], res1$expvar[3], res1$pvalue, Ratio1)
100+
Res[2,]<-cbind(res2$obs, res2$expvar[1], res2$expvar[2], res2$expvar[3], res3$pvalue, Ratio2)
101+
Res[3,]<-cbind(res3$obs, res3$expvar[1], res3$expvar[2], res3$expvar[3], res3$pvalue, Ratio3)
102+
colnames(Res)<-c("VolObs", "Std.Obs", "Expectation", "Variance", "pvalue", "Ratio.Obs.Null")
103+
rownames(Res)<-c("Model 1", "Model 2", "Model 3")
104+
Res
105+
106+
setwd("./Current Results - 0.7.7")
107+
# write.table(Res,"results_convhulln_unique_scores_0.7.7_ranks.txt")
108+
# write.table(Res,"results_convhulln_852_scores_0.7.7_ranks.txt")

03_supplemental_figures.R

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
2+
## load libraries and data
3+
4+
library(fwdata)
5+
library(dplyr)
6+
library(ggplot2)
7+
library(ggmap)
8+
library(ggrepel)
9+
10+
# source("PCA_with_NA.R")
11+
12+
library(fwdata)
13+
fw_versions(local = TRUE)
14+
fwd <- fw_data("0.7.7")
15+
16+
17+
SAm <- c(left = -120, bottom = -56, right = -34, top = 30)
18+
# sa_map <- get_stamenmap(SAm, zoom = 3, maptype = "toner")
19+
20+
21+
sa_map <- readRDS("../CESAB-detritivore-predator-dropbox/analysis_output/sa_stamenmap.rds")
22+
23+
sa_map %>%
24+
ggmap() +
25+
geom_count(data = fwd$visits %>%
26+
select(lat = latitude, lon =longitude),
27+
pch = 21, fill = "red", alpha = 0.8)
28+
29+
ggsave("supplemental_figures/map_number_of_visits.png")
30+
31+
# change scale of size to be larger
32+
33+
34+
sa_map %>%
35+
ggmap() +
36+
geom_bin2d(data = fwd$visits %>%
37+
select(lat = latitude, lon =longitude), binwidth = c(1, 1)) +
38+
scale_fill_continuous(high = "red")
39+
40+
41+
42+
# taxa ranked by their number of morphospecies ----------------------------
43+
44+
glimpse(fwd$traits)
45+
46+
# list of species ids in each dataset:
47+
48+
taxa_by_site <- fwd$abundance %>%
49+
select(dataset_id, species_id, bwg_name) %>% distinct %>%
50+
left_join(fwd$traits %>% select(species_id, taxon_level)) %>%
51+
group_by(dataset_id, taxon_level) %>%
52+
tally %>%
53+
mutate(n = n / sum(n))
54+
55+
taxa_by_site$taxon_level %>% unique %>% dput
56+
57+
58+
taxa_num <- 1:11
59+
names(taxa_num) <- c("species_name","genus", "tribe", "subfamily", "family","subord", "ord", "subclass", "class","phylum",
60+
NA)
61+
taxa_num
62+
63+
taxa_by_site %>%
64+
mutate(taxa_num = taxa_num[taxon_level]) %>%
65+
ggplot(aes(x = taxa_num, y = n, group = dataset_id)) + geom_line()
66+
67+
library(ggjoy)
68+
69+
taxa_by_site %>%
70+
ungroup %>%
71+
left_join(fwd$datasets %>% select(dataset_id, name)) %>%
72+
mutate(taxa_num = taxa_num[taxon_level],
73+
taxon_ord = forcats::fct_reorder(taxon_level, taxa_num)) %>%
74+
ggplot(aes(x = taxon_ord, height = n,y = name, group = dataset_id)) +
75+
geom_ridgeline(scale = 3.2, alpha = 0.6) +
76+
theme_minimal(base_size = 14) +
77+
theme(axis.text.y = element_text(vjust = 0),
78+
axis.text.x = element_text(angle = -45, hjust = 0))
79+
80+
ggsave("supplemental_figures/taxonomic_identifications.png")
81+
82+
# table of datasets -------------------------------------------------------
83+
84+
# main characteristics of sampling sites (location, environment) and sampled bromeliads (species identity, sampling effort)
85+
86+
fwd$datasets %>%
87+
select(country, `field site` = location, year)
88+
89+
# bromeliads sampling and species identity
90+
library(tidyr)
91+
library(purrr)
92+
fwd$bromeliads %>% group_by(visit_id) %>%
93+
nest %>%
94+
mutate(start_date = map_chr(data, ~.x$collection_date %>% min(na.rm = TRUE) %>% as.character),
95+
end_date = map_chr(data, ~.x$collection_date %>% max(na.rm = TRUE) %>% as.character),
96+
n_broms = map_dbl(data, nrow),
97+
brom_spp = map_chr(data, ~ unique(.x$species) %>% paste0(collapse = "; "))) %>%
98+
# drop the `data` column
99+
keep(is_atomic) %>%
100+
left_join(fwd$visits %>% select(visit_id, latitude, longitude)) %>%
101+
readr::write_csv("supplemental_figures/visit_information_table.csv")
102+

04_adonis.R

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
2+
# Andrew & Regis
3+
# Nov 2017
4+
# Doing an adonis analysis of the positions of the animals on the first princiapal component axes
5+
6+
7+
# load data ---------------------------------------------------------------
8+
9+
10+
library(fwdata)
11+
library(plotly)
12+
library(tidyverse)
13+
library(vegan)
14+
15+
16+
17+
# define functions --------------------------------------------------------
18+
19+
calculate_adonis_taxo_level <- function(taxon, .taxa_below_taxon, .first_four_axes){
20+
21+
# join the taxa with enough information for an adonis with the first four PCAs
22+
below_taxon_complete <- .taxa_below_taxon %>%
23+
select(species_id, taxon_name, taxon_level, family, ord) %>%
24+
left_join(.first_four_axes) %>%
25+
drop_na(Axis.1)
26+
27+
# axis scores
28+
axis_values <- as.matrix(below_taxon_complete[,c("Axis.1", "Axis.2", "Axis.3", "Axis.4")])
29+
30+
ff <- sprintf("axis_values ~ %s", taxon)
31+
res <- adonis(as.formula(ff), data = below_taxon_complete, method = "euclidian")
32+
33+
return(res)
34+
35+
}
36+
37+
# load the full dataset This is the very last version of the PCA -- the species
38+
# scores on axes 1 and 4 of the PCA according to the most recent data version.
39+
axes.raw <- read.table("Current Results - 0.7.7/RES_pca_individuals_0.7.7.txt",
40+
header = TRUE, row.names = 1)
41+
42+
fwd <- fw_data("0.7.7")
43+
str(fwd, max.level = 1)
44+
45+
46+
first_four_axes <- axes.raw %>%
47+
rownames_to_column("species_id") %>%
48+
select(species_id, Axis.1:Axis.4)
49+
50+
# put in the taxonomic information
51+
52+
# first filter for all taxa identified to the level of below family: everything
53+
# which was either genus or species
54+
55+
glimpse(fwd$traits)
56+
57+
# each taxonomic level has a little number to make it easy to filter by "rank of
58+
# taxonomic group"
59+
60+
61+
# hey so what is the distribution of lowest taxonomic levels anyway?
62+
63+
fwd$traits %>%
64+
group_by(taxon_number, taxon_level) %>%
65+
tally %>%
66+
arrange(taxon_number)
67+
68+
69+
# Give me every morphospecies which was identified below family
70+
taxa_below_family <- fwd$traits %>%
71+
select(species_id:taxon_number) %>%
72+
filter(!(taxon_number <= 9))
73+
74+
75+
76+
# to conduct an adonis we need to put this info together with the axis scores:
77+
78+
79+
taxa_below_family %>%
80+
select(species_id, taxon_name, taxon_level, family) %>%
81+
left_join(first_four_axes) %>%
82+
ggplot(aes(x = Axis.1, y = Axis.2, colour = family))+ geom_point() + guides(colour = FALSE)
83+
84+
# # Why is there missing data?
85+
# at_least_to_genera %>%
86+
# select(species_id, taxon_name, taxon_level, genus) %>%
87+
# left_join(first_four_axes) %>%
88+
# visdat::vis_miss(.)
89+
#
90+
# # Who are these sad animals?
91+
# at_least_to_genera %>%
92+
# select(species_id, taxon_name, taxon_level, genus) %>%
93+
# left_join(first_four_axes) %>%
94+
# filter(is.na(Axis.1))
95+
96+
genus_adonis <- calculate_adonis_taxo_level("family", taxa_below_family, first_four_axes)
97+
98+
summary(genus_adonis)
99+
genus_adonis
100+
101+
102+
103+
# order level -------------------------------------------------------------
104+
105+
106+
# Give me every morphospecies which was identified below order
107+
taxa_below_order <- fwd$traits %>%
108+
select(species_id:taxon_number) %>%
109+
filter(!(taxon_number <= 8 ))
110+
111+
order_adonis <- calculate_adonis_taxo_level("ord", taxa_below_order, first_four_axes)
112+
113+
summary(order_adonis)
114+
order_adonis
115+
116+
117+
118+
119+
# 3D plots of the dots ----------------------------------------------------
120+
121+
family_axis <- taxa_below_family %>%
122+
select(species_id, taxon_name, taxon_level, family) %>%
123+
left_join(first_four_axes)
124+
125+
126+
127+
family_axis %>% glimpse %>%
128+
plot_ly(x = ~ Axis.1, y = ~Axis.2, z = ~Axis.3) %>%
129+
add_markers(color = ~family)
130+
131+
132+
family_axis %>% glimpse %>%
133+
plot_ly(x = ~ Axis.2, y = ~Axis.3, z = ~Axis.4) %>%
134+
add_markers(color = ~family)

04_adonis.html

Lines changed: 496 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)