-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathD.similis_orthologs.r
55 lines (42 loc) · 1.65 KB
/
D.similis_orthologs.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Load required packages
library(tidyverse)
library(ggplot2)
library(zoo) # for rolling means
#Load data
df <- read.delim("C:/Users/ericd/Desktop/combBed.txt")
# Remove scaffolded genome copies from the set(retain contig-level originals)
exclude_list <- c("XINB_scaffold","T1_scaffold","CH434_scaffold","CH14_scaffold")
df <- df[!(df$genome %in% exclude_list), ]
# Create a binary indicator for presence of gene in each genome
df <- df %>%
mutate(presence = 1) # add a column with all 1s indicating presence
# Transform the data to wide format
wide_df <- df %>%
select(og, genome, presence) %>%
distinct() %>%
pivot_wider(names_from = genome, values_from = presence, values_fill = 0)
# Count the number of genomes each gene was found in
wide_df$totals <- rowSums(wide_df[,-1])
################################################################################
#Count the number of orthologs in HDH contigs in D. similis
counts <- df %>%
filter(genome == "D_similis_IL_SIM_A20",chr == 4 | chr == 93) %>%
left_join(wide_df, by = "og")
res <- wide_df[ wide_df$og %in% counts$og,]
totals <- colSums(res[,-1])
results <- data.frame(total = totals)
################################################################################
#Count the number of orthologs in non-HDH contigs in D. similis
counts <- df %>%
filter(genome == "D_similis_IL_SIM_A20",chr == 134 |
chr == 98 |
chr == 482 |
chr == 286 |
chr == 158 |
chr == 11 |
chr == 81
) %>%
left_join(wide_df, by = "og")
res <- wide_df[ wide_df$og %in% counts$og,]
totals <- colSums(res[,-1])
results2 <- data.frame(total = totals)