Skip to content

Commit be3df33

Browse files
committed
more updates
1 parent af86621 commit be3df33

9 files changed

+133
-67
lines changed

DESCRIPTION

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Imports:
1414
circlize,
1515
dplyr,
1616
ggplot2,
17+
ggh4x,
1718
MASS,
1819
scales,
1920
progress,
@@ -46,6 +47,7 @@ Depends:
4647
circlize,
4748
dplyr,
4849
ggplot2,
50+
ggh4x,
4951
MASS,
5052
scales,
5153
progress,

R/plot_dot.R

+41-45
Original file line numberDiff line numberDiff line change
@@ -6,95 +6,91 @@
66
#' This function can be used for plotting a single gene expression across
77
#' different groups in a study with complex group design.
88
#'
9-
#' @param seu_obj A complete Seurat object
9+
#' @param seu_obj A complete Seurat object.
1010
#' @param feature Gene name. Only one gene is allowed.
1111
#' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
12-
#' @param groupby The group to show on x axis. One of the column names in meta.data.
12+
#' @param groups The group to show on x axis. One of the column names in meta.data.
1313
#' @param splitby The group to separate the gene expression. One of the column names in meta.data.
14-
#' @param scale.by Methods to scale the dot size. "radius" or "size"
15-
#' @param strip.color Colors for the strip background
14+
#' @param scale.by Methods to scale the dot size. "radius" or "size".
15+
#' @param color.palette Color for gene expression.
16+
#' @param strip.color Colors for the strip background.
17+
#' @param font.size Font size for the labels.
1618
#' @param do.scale Whether or not to scale the dot when percentage expression of the gene is less than 20.
1719
#' @return A ggplot object
1820
#' @export
1921
complex_dotplot_single <- function(
2022
seu_obj,
2123
feature,
2224
celltypes=NULL,
23-
groupby,
25+
groups,
2426
splitby=NULL,
27+
color.palette = NULL,
28+
font.size = 12,
2529
strip.color=NULL,
2630
do.scale=T,
2731
scale.by='radius'
2832
){
29-
groupby_level<-levels(seu_obj@meta.data[,groupby])
30-
if (is.null(groupby_level)){
31-
seu_obj@meta.data[,groupby] <-factor(seu_obj@meta.data[,groupby], levels = names(table(seu_obj@meta.data[,groupby])))
32-
groupby_level<-levels(seu_obj@meta.data[,groupby])
33+
groups_level<-levels(seu_obj@meta.data[,groups])
34+
if (is.null(groups_level)){
35+
seu_obj@meta.data[,groups] <-factor(seu_obj@meta.data[,groups], levels = names(table(seu_obj@meta.data[,groups])))
36+
groups_level<-levels(seu_obj@meta.data[,groups])
3337
}
34-
if (sum(grepl("_", groupby_level))>0){
35-
seu_obj@meta.data[,groupby]<-gsub("_","-",seu_obj@meta.data[,groupby])
36-
groupby_level<-gsub("_","-",groupby_level)
37-
seu_obj@meta.data[,groupby] <-factor(seu_obj@meta.data[,groupby], levels = groupby_level)
38-
}
3938
if(is.null(celltypes)){
4039
celltypes<-levels(seu_obj)
4140
}
42-
seu_obj<-subset(seu_obj,idents=celltypes)
43-
celltypes<-gsub("_", "-", celltypes)
44-
seu_obj@meta.data$celltype<-as.character(seu_obj@active.ident)
45-
seu_obj@meta.data$celltype<-gsub("_", "-", seu_obj@meta.data$celltype)
46-
seu_obj<-SetIdent(seu_obj, value='celltype')
47-
levels(seu_obj)<-celltypes
41+
if(is.null(color.palette)){
42+
color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
43+
}
4844
if(!is.null(splitby)){
4945
if (is.null(levels(seu_obj@meta.data[,splitby]))){
5046
seu_obj@meta.data[,splitby] <-factor(seu_obj@meta.data[,splitby], levels = names(table(seu_obj@meta.data[,splitby])))
5147
}
5248
splitby_level<-levels(seu_obj@meta.data[,splitby])
53-
count_df<-extract_gene_count(seu_obj, features = feature, meta.groups = c(groupby,splitby))
54-
count_df$new_group<-paste(count_df[,groupby], count_df[,"celltype"], count_df[,splitby],sep = "___")
55-
exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
56-
pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
49+
count_df<-extract_gene_count(seu_obj, features = feature, cell.types = celltypes, meta.groups = c(groups,splitby))
50+
count_df$new_group<-paste(count_df[,groups], count_df[,"celltype"], count_df[,splitby],sep = "___")
51+
exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
52+
pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)}) #This is the same data processing as Seurat
5753
colnames(exp_df)[2]<-"avg.exp"
5854
colnames(pct_df)[2]<-"pct.exp"
5955
data_plot<-merge(exp_df, pct_df, by='new_group')
60-
data_plot$groupby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
56+
data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
6157
data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
6258
data_plot$splitby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[3]]}))
63-
data_plot$groupby <- factor(data_plot$groupby, levels = groupby_level)
59+
data_plot$groups <- factor(data_plot$groups, levels = groups_level)
6460
data_plot$splitby <- factor(data_plot$splitby, levels = splitby_level)
6561
data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
6662
} else {
67-
count_df<-extract_gene_count(seu_obj, features = feature, meta.groups = groupby)
68-
count_df$new_group<-paste(count_df[,groupby], count_df[,"celltype"],sep = "_")
63+
count_df<-extract_gene_count(seu_obj, features = feature, cell.types = celltypes, meta.groups = groups)
64+
count_df$new_group<-paste(count_df[,groups], count_df[,"celltype"],sep = "___")
6965
exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
7066
pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
7167
colnames(exp_df)[2]<-"avg.exp"
7268
colnames(pct_df)[2]<-"pct.exp"
7369
data_plot<-merge(exp_df, pct_df, by='new_group')
74-
data_plot$groupby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "_"),FUN = function(x){x[[1]]}))
75-
data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "_"),FUN = function(x){x[[2]]}))
76-
data_plot$groupby <- factor(data_plot$groupby, levels = groupby_level)
77-
data_plot$celltype <- factor(data_plot$celltype, levels = celltypes)
70+
data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
71+
data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
72+
data_plot$groups <- factor(data_plot$groups, levels = groups_level)
73+
data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
7874
}
7975
scale.func <- switch(
8076
EXPR = scale.by,
8177
'size' = scale_size,
8278
'radius' = scale_radius,
8379
stop("'scale.by' must be either 'size' or 'radius'")
84-
)
80+
) ### This function is from Seurat https://github.com/satijalab/seurat
8581
data_plot$pct.exp <- round(100 * data_plot$pct.exp, 2)
8682
data_plot$avg.exp <- scale(data_plot$avg.exp)
87-
p<-ggplot(data_plot, aes(y = celltype, x = groupby)) +
83+
p<-ggplot(data_plot, aes(y = celltype, x = groups)) +
8884
geom_tile(fill="white", color="white") +
8985
geom_point(aes( colour=avg.exp, size =pct.exp)) +
90-
scale_color_gradientn(colours = colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255))+
86+
scale_color_gradientn(colours = color.palette )+
9187
theme(panel.background = element_rect(fill = "white", colour = "black"),
92-
axis.text.x = element_text(angle = 45, hjust = 1),
93-
plot.title = element_text(size = 16,hjust = 0.5, face = 'bold'),
94-
axis.text = element_text(size = 12),
95-
axis.title=element_text(size=8),
96-
legend.text=element_text(size=8),
97-
legend.title = element_text(size = 12),
88+
axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
89+
plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
90+
axis.text = element_text(size = font.size),
91+
legend.text=element_text(size=(font.size-2)),
92+
legend.title = element_text(size = (font.size)),
93+
strip.text = element_text( size = font.size),
9894
legend.position="right")+
9995
ylab("")+xlab("")+ggtitle(feature)
10096
if(do.scale){
@@ -126,15 +122,15 @@ complex_dotplot_single <- function(
126122
#' @param seu_obj A complete Seurat object
127123
#' @param features A vector of gene names.
128124
#' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
129-
#' @param groupby Group ID Must be one of the column names in the meta.data slot of the Seurat object.
125+
#' @param groups Group ID Must be one of the column names in the meta.data slot of the Seurat object.
130126
#' @param strip.color Colors for the strip background
131127
#' @return A ggplot object
132128
#' @export
133129
complex_dotplot_multiple <- function(
134130
seu_obj,
135131
features,
136132
celltypes=NULL,
137-
groupby,
133+
groups,
138134
strip.color = NULL
139135
){
140136
pb <- progress_bar$new(
@@ -143,7 +139,7 @@ complex_dotplot_multiple <- function(
143139
plot_list<-list()
144140
for(i in 1:length(features)){
145141
pp<-invisible(
146-
complex_dotplot_single(seu_obj = seu_obj, feature = features[i], groupby = groupby, celltypes = celltypes)
142+
complex_dotplot_single(seu_obj = seu_obj, feature = features[i], groups = groups, celltypes = celltypes)
147143
)
148144
pp<-pp$data
149145
pp$gene <- features[i]
@@ -155,7 +151,7 @@ complex_dotplot_multiple <- function(
155151
all_data$gene<-factor(all_data$gene, levels = rev(features))
156152
all_data$celltype <- factor(all_data$celltype, levels = levels(seu_obj))
157153
p <- invisible(
158-
ggplot(all_data, aes(x = groupby, y = gene)) +
154+
ggplot(all_data, aes(x = groups, y = gene)) +
159155
geom_tile(fill="white", color="white") +
160156
geom_point(aes( colour=avg.exp, size =pct.exp), alpha=0.9) +
161157
scale_color_gradientn(colours = grDevices::colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255))+

R/plot_violin.R

+57-12
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ complex_vlnplot_single <- function(
4949
gene_count[,allgroups[i]]<-factor(gene_count[,allgroups[i]],
5050
levels = group_level)
5151
}
52-
max_exp<-max(gene_count[,feature])
5352
set.seed(seed = 42)
5453
noise <- rnorm(n = length(x = gene_count[,feature])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
5554
gene_count[, feature]<-gene_count[,feature]+noise
@@ -60,14 +59,14 @@ complex_vlnplot_single <- function(
6059
xlab("") +
6160
ylab("") +
6261
ggtitle(feature)+
63-
theme_bw() +
64-
theme(panel.grid.major = element_blank(),
65-
panel.grid.minor = element_blank(),
62+
theme(panel.background = element_rect(fill = "white",colour = "black"),
63+
axis.title = element_text(size = font.size),
64+
axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
65+
axis.text.y = element_text(size=(font.size-2)),
6666
strip.text = element_text( size = font.size),
67-
axis.text.x = element_text(size=(font.size-2), angle = 45, hjust = 1, vjust = 1),
68-
axis.title.y = element_text(size = font.size),
69-
plot.title = element_text(size=font.size, face = "bold", hjust = 0.5),
70-
legend.position = "none")
67+
legend.title = element_blank(),
68+
legend.position = 'none',
69+
plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5))
7170
if(add.dot){
7271
p = p + geom_quasirandom(size=pt.size, alpha=0.2)
7372
}
@@ -82,9 +81,7 @@ complex_vlnplot_single <- function(
8281
}
8382

8483
} else {
85-
if(!is.null(splitby)){
86-
stop("This function does not support spliting multiple groups. Plots will look too messy! Please select one group only in the 'groups' parameter if you want to use 'splitby'.")
87-
}
84+
if(is.null(splitby)){
8885
all_levels<-list()
8986
for(i in 1:length(groups)){
9087
if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
@@ -114,6 +111,55 @@ complex_vlnplot_single <- function(
114111
}
115112
g <- change_strip_background(p, type = 'both', strip.color = strip.color)
116113
print(grid::grid.draw(g))
114+
} else {
115+
count_list<-list()
116+
for(i in 1:length(groups)){
117+
df1<-gene_count[, c(groups[i],splitby[i],feature, "celltype")]
118+
colnames(df1)[1:2]<-c("group","split")
119+
df1$new_group<-groups[i]
120+
count_list[[i]]<-df1
121+
}
122+
new_count<-do.call("rbind", count_list)
123+
new_count$celltype<-factor(new_count$celltype, levels = celltypes)
124+
group_level<-list()
125+
for(i in 1:length(groups)){
126+
if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
127+
seu_obj@meta.data[,groups[i]] <-factor(seu_obj@meta.data[,groups[i]], levels = names(table(seu_obj@meta.data[,groups[i]])))
128+
}
129+
group_level[[i]]<-levels(seu_obj@meta.data[,groups[i]])
130+
}
131+
group_level<-as.character(unlist(group_level))
132+
new_count$group<-factor(new_count$group, levels=group_level)
133+
fill_x1<-grDevices::rainbow(length(groups), alpha = 0.5)
134+
fill_x2<-list()
135+
for(i in 1:length(splitby)){
136+
n_col<-unique(gene_count[, splitby[i]])
137+
fill_x2[[i]]<-scales::hue_pal(l=90)(length(n_col))
138+
}
139+
fill_x2<-as.character(unlist(fill_x2))
140+
fill_x <- c(fill_x1, fill_x2)
141+
fill_y <- scales::hue_pal(l=90)(length(celltypes))
142+
p<- ggplot(new_count, aes_string(x="group", y=feature, fill="group"))+
143+
geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
144+
xlab("") + ylab("") + ggtitle(feature) +
145+
theme(panel.background = element_rect(fill = "white",colour = "black"),
146+
axis.title = element_text(size = font.size),
147+
axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
148+
axis.text.y = element_text(size=(font.size-2)),
149+
strip.text = element_text( size = font.size),
150+
legend.title = element_blank(),
151+
legend.position = 'none',
152+
plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5)) +
153+
facet_nested(celltype ~ new_group + split, scales = "free_x",
154+
strip = strip_nested( background_x = elem_list_rect(fill = fill_x),
155+
background_y = elem_list_rect(fill = fill_y)))
156+
if(add.dot){
157+
p = p + geom_quasirandom(size=pt.size, alpha=0.2)
158+
print(p)
159+
} else {
160+
p
161+
}
162+
}
117163
}
118164
}
119165

@@ -162,7 +208,6 @@ complex_vlnplot_multiple <- function(
162208
group_level<-levels(seu_obj@meta.data[,group])
163209
gene_count[,group]<-factor(gene_count[,group],levels = group_level)
164210
for(i in 1:length(features)){
165-
max_exp<-max(gene_count[,features[i]])
166211
set.seed(seed = 42)
167212
noise <- rnorm(n = length(x = gene_count[,features[i]])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
168213
gene_count[, features[i]]<-gene_count[,features[i]]+noise

README.md

+20-3
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,34 @@ dev.off()
9090
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_single_split.png) <br />
9191
#### One gene/multiple group factors violin plot:
9292
```
93-
png(filename = 'vlnplot_multiple.png', width = 6, height = 6,units = 'in', res = 200)
93+
png(filename = 'vlnplot_multiple.png', width = 6, height = 6,units = 'in', res = 100)
9494
complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),celltypes = c("PTS1" , "PTS2" , "PTS3" , "NewPT1" , "NewPT2"), font.size = 10)
9595
dev.off()
9696
```
9797
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple.png) <br />
9898

99-
Note that the Replicates group here is for demo purpose. This is not the mouse ID as reported in our original paper.
99+
Each group factor can be further splitted by its own factor by setting the splitby argument. Note that in this case, the order of the group factors needs to match the order of splitby factors. For example:
100+
```
101+
[email protected]$ReplicateID<-plyr::mapvalues([email protected]$Replicates, from = names(table(([email protected]$Replicates))), to = c(rep("Rep1",3),rep("Rep2",3), rep("Rep3",1)))
102+
[email protected]$ReplicateID<-as.character([email protected]$ReplicateID)
103+
104+
png(filename = 'vlnplot_multiple_split.png', width = 7, height = 5,units = 'in', res = 200)
105+
complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),
106+
celltypes = c("PTS1" , "PTS2" , "PTS3" , "NewPT1" , "NewPT2"),
107+
font.size = 10, splitby = c("Phase","ReplicateID"), pt.size=0.05)
108+
dev.off()
109+
110+
### In this example, "Phase" is a splitby factor for "Group" and "ReplicateID" is a splitby factor for "Replicates".
111+
112+
```
113+
114+
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_split.png) <br />
115+
116+
Note that the Replicates group here is just for showcase purpose. This is not a meaning group ID in our snRNA-seq dataset.
100117

101118
#### Multiple genes/one group factor violin plot:
102119
```
103-
png(filename = 'vlnplot_multiple_genes.png', width = 8, height = 6,units = 'in', res = 300)
120+
png(filename = 'vlnplot_multiple_genes.png', width = 6, height = 6,units = 'in', res = 300)
104121
complex_vlnplot_multiple(iri.integrated, features = c("Havcr1", "Slc34a1", "Vcam1", "Krt20" , "Slc7a13", "Slc5a12"), celltypes = c("PTS1" , "PTS2" , "PTS3" , "NewPT1" , "NewPT2"), group = "Group", add.dot=T, pt.size=0.01, alpha=0.01, font.size = 10)
105122
dev.off()
106123
```

data/vlnplot_multiple_split.png

166 KB
Loading

data/vlnplot_single.png

6 KB
Loading

data/vlnplot_single_split.png

8.36 KB
Loading

man/complex_dotplot_multiple.Rd

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/complex_dotplot_single.Rd

+11-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)