-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGRch38toGRch37.R
More file actions
executable file
·70 lines (59 loc) · 2.21 KB
/
GRch38toGRch37.R
File metadata and controls
executable file
·70 lines (59 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
## mapping use http://genome.ucsc.edu/cgi-bin/hgLiftOver
### get the mapped variants from Clinvar itself
Clinvar <- read.delim("variant_summary.txt")
gr37 <- Clinvar[Clinvar[,"Assembly"]=="GRCh37",]
genedx <- read.delim("clinvar_result.txt")
remap <- genedx[!(genedx[,"Name"] %in% gr37[,"Name"]),]
qwt(remap,file="toGRch37_482.txt",flag=2)
## try to mapping the rest 5 using liftover
beda <- remap[,c("Chromosome","Location")]
bedb <- sapply(1:dim(beda)[1],function(i) {
if(any(grepl("-",beda[i,2]))){ unlist(strsplit(beda[i,2]," - ")) ;
}else{ rep(beda[i,2],2); }
})
bedb <- t(bedb)
c <- paste("chr",beda[,1],":",bedb[,1],"-",bedb[,2],sep="")
subs <- c(c=="chr:-" | c=="chrChromosome:Location-Location")
c <- c[!subs]
qwt(c,file="GeneDx_5.bed")
### get ref and alt from variant names to run wannovar
genedx37 <- gr37[gr37[,"Name"] %in% genedx[,"Name"], ]
wanno <- genedx37[,c("Chromosome","Start","Stop","ReferenceAllele","AlternateAllele")]
qwt(wanno,file="Genedx37_23609.txt")
anno5 <- read.table("GeneDx_5_gr37.txt")
colnames(anno5) <- colnames(wanno)
wannoa <- rbind(wanno,anno5)
qwt(wannoa,file="Genedx_GR37.txt")
### get ref and alt for 975 P or VLP variants
Pvlp <- read.delim("Table3Sup.txt")
tmp <- Pvlp[,4]
tmp <- gsub("+","",tmp)
tmp <- gsub("c.","",tmp)
tmp <- gsub("_","",tmp)
tmp <- gsub("-","",tmp)
refalt <- sapply(1:length(tmp), function(i){
if(grepl("del",tmp[i])){
c(gsub("del","",gsub("\\d","",tmp[i])),"-")
}else if(grepl("ins",tmp[i])){
c("-",gsub("ins","",gsub("\\d","",tmp[i])))
}else if(grepl("dup",tmp[i])){
onea <- gsub("dup","",gsub("\\d","",tmp[i]))
c(onea,paste(onea,onea,sep=""))
}else{
unlist(strsplit(gsub("\\d","",tmp[i]),">"))
}
})
refalt <- t(refalt)
#spchar <- substr(refalt[1,1],1,1)
#refalt[,1] <- gsub(spchar,"",refalt[,1])
wann <- cbind(Pvlp[,2],Pvlp[,3],Pvlp[,3],refalt)
qwt(wann,file="GeneDx_PVLP.txt")
## fixed version by manual double check
tmp <- read.delim("GeneDx_PVLP_fixed.txt",header=FALSE)
tmp[tmp[,4]=="",4] <- "-"
tmp[tmp[,5]=="",5] <- "-"
qwt(tmp,file="GeneDx_PVLP_fixed_alt.txt")
tmp[tmp=="-"] <- "*"
qwt(tmp,file="GeneDx_PVLP_fixed_alt1.txt")
tmp[tmp=="-"] <- "."
qwt(tmp,file="GeneDx_PVLP_fixed_alt2.txt")