-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathUtilities.R
467 lines (443 loc) · 17.1 KB
/
Utilities.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
#' effector_significance
#'
#' @param eff.m.param table with scale parameters for gene ranking
#' @param Desc description table of the genes
#' @param j.nB predictive model based on Naives Bayes
#' @param sample.l sample name
#' @nGenes number of genes in hub gene ranking
#' @description define the ranking of the effector genes
#' @return gene.rank.e.description table of ranking and descriptions
effector_significance <- function(eff.m.param, Desc, j.nB, sample.l){
tryCatch(
expr = {
prob.pred <- suppressWarnings(predict(j.nB, type="prob",
newdata = eff.m.param,
threshold = 0.01))
gene.rank.eff <- (prob.pred$posterior)[,2]
#
gene.rank.eff = gene.rank.eff[order(gene.rank.eff, decreasing=TRUE)]
gene.rank.eff = as.data.frame(gene.rank.eff)
# merge description and ranking
gene.rank.e.description <- left_join(rownames_to_column(gene.rank.eff),
rownames_to_column(Desc),
by = ("rowname" = "rowname"))
gene.rank.e.description <- as.data.frame(gene.rank.e.description)
colnames(gene.rank.eff)[1] <- sample.l
return(gene.rank.e.description)
},
error = function(e){
message("NECorr error in the significance")
message(e)
},
warning = function(w){
message("NECorr warning in the significance ranking of the effectors")
message(w)
},
finally = {
}
)
}
#' hub_edge_significance
#'
#' @param network.int network file
#' @param gene.rank.hash gene ranking hash
#' @description define the hub edge ranking
#' @return res
hub_edge_significance <- function(network.int=network.int, gene.rank.hash=gene.rank.hash){
tryCatch(
expr = {
sourceIDs <- as.vector(network.int[,1])
targetIDs <- as.vector(network.int[,2])
ranks.sum <- rep(1,length(targetIDs))
for(i in 1:nrow(network.int)){
ranks.sum[i] <- sum(gene.rank.hash[[sourceIDs[i]]], gene.rank.hash[[targetIDs[i]]])
}
hub.int.ranks <- data.frame(sourceIDs,targetIDs,ranks.sum)
break.points <- c(-Inf, unique(sort(as.numeric(hub.int.ranks[,3]))), Inf)
p2 <- cut( as.numeric(hub.int.ranks[,3]), breaks=break.points, labels=FALSE )
p2 <- 1 - p2/length(break.points)
hub.int.ranks <- as.data.frame(cbind(hub.int.ranks,p2))
hub.int.ranks$p2 <- as.numeric(as.character(hub.int.ranks$p2))
res <- hub.int.ranks
return(res)
},
error = function(e){
message("Error in the hub significance calculation:")
message(e)
},
warning = function(w){
message("Warning in hub significance calculation:")
message(w)
},
finally = {}
)
}
#' effector_edge_significance
#'
#' @param network.int network data
#' @param gene.rank.eff.hash gene ranking for the effector in a hash
#' @description define the edge ranking around the effectors
#' @return res
effector_edge_significance <- function(network.int=network.int,
gene.rank.eff=gene.rank.eff, nGenes=nGenes){
tryCatch(
expr = {
targetIDs <- as.vector(network.int[,2])
eff_ranks.sum <- rep(1,length(targetIDs))
gene.rank.eff.hash <- hash()
gene.rank.eff = as.data.frame(gene.rank.eff)
geneIDs <- row.names(gene.rank.eff)
for (i in 1:nGenes){
geneRank <- as.numeric(gene.rank.eff[i,1])
gene.rank.eff.hash[[geneIDs[i]]] <- geneRank
}
sourceIDs <- as.vector(network.int[,1])
targetIDs <- as.vector(network.int[,2])
eff_ranks.sum <- rep(1,length(targetIDs))
for(i in 1:nrow(network.int)){
eff_ranks.sum[i] <- sum(gene.rank.eff.hash[[sourceIDs[i]]], gene.rank.eff.hash[[targetIDs[i]]])
}
eff.int.ranks <- data.frame(sourceIDs,targetIDs,eff_ranks.sum)
break.points <- c(-Inf, unique(sort(as.numeric(eff.int.ranks[,3]))), Inf)
p2 <- cut( as.numeric(eff.int.ranks[,3]), breaks=break.points, labels=FALSE )
p2 <- 1 - p2/length(break.points)
eff.int.ranks <- as.data.frame(cbind(eff.int.ranks,p2))
eff.int.ranks$p2 <- as.numeric(as.character(eff.int.ranks$p2))
res <- eff.int.ranks
return(res)
},
error = function(e){
message("Error in effector edge significance:")
message(e)
},
warning = function(w){
message("Warnings in effector edge significance:")
message(w)
},
finally = {
}
)
}
#' activator_significant
#'
#' @param hub.int.significant significance of the hub genes
#' @param network.int network edges
#' @Desc description file genes and gene names
#' @description define the activator significance
#' @return res
activator_significant <- function(hub.int.significant = hub.int.significant,
network.int = network.int,
Desc = Desc){
tryCatch(
expr = {
# find genes that are significant in the hub subnetwork in the complete network
sig.hub <- unique(c(as.character(hub.int.significant$sourceIDs),
as.character(hub.int.significant$targetIDs)))
sc.sig.hub <- subset(network.int, network.int[,1] %in% sig.hub)
tg.sig.hub <- subset(network.int, network.int[,2] %in% sig.hub)
net.extension.sig.hub <- rbind(sc.sig.hub,tg.sig.hub)
g <- graph.data.frame(net.extension.sig.hub, directed = T)
## calculate the pagerank known as regulator
pagerank<-page.rank(g)$vector
# degree centrality
degree<-degree(g, v=V(g), mode = "total",normalized = T)
# out degree
degree_out<-degree(g, v=V(g), mode = "out")
# preparation to transform the network statistics in a table
gene_name<-V(g)$name
# bind into matrices
datagrid <- data.frame(I(gene_name), pagerank, degree, degree_out)
cc <- c("gene_name", "pagerank", "EdgeCount", "degree_out")
colnames(datagrid) <- cc
rownames(datagrid) <- gene_name
datagrid <- datagrid[order(datagrid$pagerank, decreasing = T),]
datagrid$pagerank <- ScalN(datagrid$pagerank)
gene.rank.act.description <- left_join(rownames_to_column(datagrid),
rownames_to_column(Desc),
by = ("rowname" = "rowname"))
rownames(gene.rank.act.description) <- gene.rank.act.description$rowname
gene.rank.act.description <- gene.rank.act.description[, -c(1,2)]
res <- gene.rank.act.description
return(res)
},
error = function(e){
message("Error in activator significant search:")
message(e)
},
warning = function(w){
message("Warning in activator significant search:")
message(w)
},
finally = {
}
)
}
#' linked_act_hub_net
#'
#' @param hub.int.significant significant network of the hub genes
#' @param gene.rank.act.significant the ranking of the activator genes
#' @param network.int full gene network
#' @return act.net
linked_act_hub_net <- function(hub.int.significant=hub.int.significant,
gene.rank.act.significant=gene.rank.act.significant,
network.int=network.int){
tryCatch(
expr = {
sig.hub <- unique(c(as.character(hub.int.significant[,1]),
as.character(hub.int.significant[,2])))
sc.sig.hub <- subset(network.int, network.int[,1] %in% sig.hub)
tg.sig.hub <- subset(network.int, network.int[,2] %in% sig.hub)
net.extension.sig.hub <- rbind(sc.sig.hub,tg.sig.hub)
act.net.1 <- subset(
net.extension.sig.hub,
net.extension.sig.hub[,1] %in% as.vector(as.character(gene.rank.act.significant[,1])))
act.net.2 <- subset(
net.extension.sig.hub,
net.extension.sig.hub[,2] %in% as.vector(as.character(gene.rank.act.significant[,1])))
act.net.pre <- rbind(act.net.1,act.net.2)
meanSig <- mean(as.vector(as.numeric(as.character(hub.int.significant[,3]))))
act.net <- cbind(act.net.pre,
rep(meanSig,nrow(act.net.pre)),
rep("act",nrow(act.net.pre)))
colnames(act.net) <- c("source","target","score","node.type")
return(act.net)
},
error = function(e){
message("Error in linking hub genes significant and activator significant genes:")
message(e)
},
warning = function(w){
message("Warning in linking hub genes significant and activator significant genes:")
message(w)
},
finally = {
}
)
}
#' linked_eff_hub_net
#'
#' @param hub.int.significant significant network of the hub genes
#' @param eff.int.significant significant network of the effector genes
#'
#' @return eff.net
linked_eff_hub_net <- function(hub.int.significant=hub.int.significant,
eff.int.significant=eff.int.significant){
tryCatch(
expr = {
sig.hub <- unique(c(as.character(hub.int.significant[,1]),
as.character(hub.int.significant[,2])))
sc.sig.eff <- subset(eff.int.significant, eff.int.significant[,1] %in% sig.hub)
tg.sig.eff <- subset(eff.int.significant, eff.int.significant[,2] %in% sig.hub)
eff.net.pre <- rbind(sc.sig.eff,tg.sig.eff)
eff.net <- cbind(eff.net.pre[,1],eff.net.pre[,2],eff.net.pre[,3], rep(nrow(eff.net.pre)))
return(eff.net)
},
error = function(e){
message("Error in linking hub genes significant and effector significant genes:")
message(e)
},
warning = function(w){
message("Warning in linking hub genes significant and effector significant genes:")
message(w)
},
finally = {
}
)
}
#' indexing.network
#' @param tab expression table
#' @param network network needed to be indexed
#' @return netindex
#' @export
indexing.network <- function (tab,network) {
tryCatch(
expr = {
# create table with all the gene in network
t <- as.data.frame(unique(apply(network,1, function(x) c(x[1],x[2]))))
tt1 <- t(t)
#replace the gene name by their index in expression table
indexing <- function (x,y) {
if ( length(which(rownames(y) == x[1])) != 0 && length(which(rownames(y) == x[2])) != 0) {
c(which(rownames(y) == x[1]),which(rownames(y) == x[2]))
}
}
netindex <- do.call(rbind,apply(tt1, 1, indexing,tab))
return(netindex)
},
error = function(e){
message("Error in the Network indexing")
message(e)
},
warning = function(w){
message("Warning in Network indexing")
message(w)
},
finally = {
}
)
}
#' DE.ranking
#' @description differential expression ranking using the network gene
#' 1-expression file with NECorr format
#' 2-list of network genes
#' 3-expression factor table generate automatically
#' 4-name of the studied sample
#' read expressiontable from microarrays or RNAseq
#' @param exps expression table
#' @param GeneList gene list present in the network
#' @param factortab table with factors
#' @param sample.l the sample chosen for tissue specificity
#' @param sample.names all the samples in the metadata
#' @param exps.file the expression file
#' @import Biobase
#' @import stats
#' @import limma
#' @return ranks rank of the gene in DEG analysis
#' @export
DE.ranking <- function(exps, GeneList, factortab, sample.l,
sample.names, exps.file = FALSE){
tryCatch(
expr = {
if (exps.file == TRUE){
df <- as.matrix(read.table(exps, header=TRUE,sep="\t",row.names = 1))
expression <- df[,colSums(is.na(df)) != nrow(df)]
targets <- read.table(factortab,header=TRUE,sep="\t",row.names=1)
sample.names <- unique(targets[,1])
f <- factor(targets[,1], levels=sample.names)
}else{
expression <- as.matrix(exps)
f <- factor(factortab)
}
# parse expression data to only have the gene that are
# in the network - load the genes that are in the networks
sel.Genes <- intersect(rownames(expression),GeneList)
no.duplicate.col <- as.character(colnames(expression))
colnames(expression) <- make.unique(no.duplicate.col)
expressio <- expression[sel.Genes, ]
design <- model.matrix(~ 0 + f)
# DE (differential expression)
eset <- ExpressionSet(assayData=expressio)
colnames(design) <- gsub("^f", "", colnames(design)) # Assigns column names.
fit <- lmFit(eset, design)
other.names = sample.names[which(sample.names != sample.l)]
equation <- noquote(paste0(noquote(sample.l), " - ((",
paste(other.names, collapse = '+'), ") /",
length(sample.names)-1,")"))
assign(".equation", equation, envir = .GlobalEnv)
#contrast.matrix <- makeContrasts(equation , levels=design)
contrast.matrix <- makeContrasts(.equation, levels=design)
remove(".equation", envir= .GlobalEnv)
fit2 <- contrasts.fit(fit, contrast.matrix)
# Computes estimated coefficients and standard errors for a given set of contrasts.
fit2 <- suppressWarnings(eBayes(fit2))
#write.table(topTable(fit2, coef=1, adjust="fdr", sort.by="B",
# number=50000), file="limma_complete.xls", row.names=F, sep="\t")
# Exports complete limma statistics table for first comparison group ('coef=1')
# to tab delimited text file.
de <- topTable(fit2, coef=1, adjust.method="fdr", sort.by="P", number=length(sel.Genes))
ranks <- ScalN(de$B)
names(ranks) = rownames(de)
#print(head(ranks))
return(ranks)
},
error = function(e){
message("Error in the Differential Expression ranking")
message(e)
},
warning = function(w){
message("Warning in the Differential Expression ranking")
message(w)
},
finally = {
}
)
}
#' NetTopology
#' @description network topology statistics using igraph methods;
#' script partly adapted from https://gist.github.com/mhawksey/16-2306
#' @author Christophe Liseron-Monfils, Martin Hawksey
#' @param network the network should be a list of interactions space by a line
#' @return datagrid
#' @export
NetTopology <- function(network){
tryCatch(
expr = {
# pass to igraph the network that would transform in a graph object
g <- graph.data.frame(network, directed = T)
## calculate the topology stats necessary for network statistics
betweenness_centrality <- betweenness(g,v=V(g), directed = F, normalized = T)
#betweenness_centrality <- betweenness.estimate(g,vids=V(g), directed = F, normalized = T)
eigenvector_centrality<-evcent(g, scale = TRUE)
# algorithm used by google to find webpages; this is an evolution of eigen-vector
# not sure if applicable to biological data yet
pagerank<-page.rank(g)$vector
# degree centrality
degree<-degree(g, v=V(g), mode = "total",normalized = T)
# in and out degree
degree_in<-degree(g, v=V(g), mode = "in")
degree_out<-degree(g, v=V(g), mode = "out")
# transitivity of gene in the network, also called coefficient cluster
transitivity_centrality <- transitivity( g, vids=V(g),type ="local", isolates= "zero")
# preparation to transfrom the network statistics in a table
gene_name<-V(g)$name
# bind into matrice
datagrid <- data.frame(I(gene_name),degree,degree_in,degree_out,
betweenness_centrality,eigenvector_centrality[[1]],
transitivity_centrality,pagerank)
cc <- c("gene_name", "EdgeCount","degree_in","degree_out","BetweennessCentrality",
"eigenvector_centrality","ClusteringCoefficient","pagerank")
colnames(datagrid) <- cc
rownames(datagrid) <- gene_name
#eliminate the first column being a duplicate of rownames now
datagrid <- datagrid[,-1]
return(datagrid)
},
error = function(e){
message("Error in the network topology parameters")
message(e)
},
warning = function(w){
message("Warning in the network topology parameters")
message(w)
},
finally = {
}
)
}
#' ScalN
#' @description rescaling data in a range between 0 to 1
#' @param x vector that need to be rescaled
#' @param ... other paramter that can be passed on
#' @return res
#' @export
ScalN <- function(x, ...) {
res <- (x - min(x, ...))/(max(x, ...) - min(x, ...))
return(res)
}
#' scaling_param
#' @description
#' rescaling the data from the parameter
#' @param x vector with names of the genes
#' @return res
#' @export
scaling_param <- function(x){
naming <- deparse(substitute(x))
x1 <- range(x[!is.infinite(x)])
x[is.infinite(x) & sign(x) < 0] <- x1[1]
x[is.infinite(x)] <- x1[2]
x <- as.data.frame(x)
x$x <- rescale(x$x, to=c(0,100))
colnames(x) <- naming
return(x)
}
#' fishersMethod
#' @description combine p-values Fisher method to have overall importance of
#' this node in the studied tissue (using code from Michael Love code)
#' (http://mikelove.wordpress.com/2012/03/12/combining-p-values-fishers-method-sum-of-p-values-binomial/)
#' @param x vector of p-values
#' @return res
#' @export
fishersMethod <- function(x) {
res <- pchisq(-2 * sum(log(x)),df=2*length(x),lower.tail=FALSE)
return(res)
}