-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReplicate.py
More file actions
131 lines (110 loc) · 4.95 KB
/
Replicate.py
File metadata and controls
131 lines (110 loc) · 4.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
ATCAATGATCAACGTAAGCTTCTAAGCATGATCAAGGTGCTCACACAGTTTATCCACAACCTGAGTGGATGACATCAAGATAGGTCGTTGTATCTCCTTCCTCTCGTACTCTCATGACCACGGAAAGATGATCAAGAGAGGATGATTTCTTGGCCATATCGCAATGAATACTTGTGACTTGTGCTTCCAATTGACATCTTCAGCGCCATATTGCGCTGGCCAAGGTGACGGAGCGGGATTACGAAAGCATGATCATGGCTGTTGTTCTGTTTATCTTGTTTTGACTGAGACTTGTTAGGATAGACGGTTTTTCATCACTGACTAGCCAAAGCCTTACTCTGCCTGACATCGACCGTAAATTGATAATGAATTTACATGCTTCCGCGACGATTTACCTCTTGATCATCGATCCGATTGAAGATCTTCAATTGTTAATTCTCTTGCCTCGACTCATAGCCATGATGAGCTCTTGATCATGTTTCCTTAACCCTCTATTTTTTACGGAAGAATGATCAAGCTGCTGCTCTTGATCATCGTTTC
aactctatacctcctttttgtcgaatttgtgtgatttatagagaaaatcttattaactgaaactaaaatggtaggtttggtggtaggttttgtgtacattttgtagtatctgatttttaattacataccgtatattgtattaaattgacgaacaattgcatggaattgaatatatgcaaaacaaacctaccaccaaactctgtattgaccattttaggacaacttcagggtggtaggtttctgaagctctcatcaatagactattttagtctttacaaacaatattaccgttcagattcaagattctacaacgctgttttaatgggcgttgcagaaaacttaccacctaaaatccagtatccaagccgatttcagagaaacctaccacttacctaccacttacctaccacccgggtggtaagttgcagacattattaaaaacctcatcagaagcttgttcaaaaatttcaatactcgaaacctaccacctgcgtcccctattatttactactactaataatagcagtataattgatctga
# Copy your updated FrequentWords function (along with all required subroutines) below this line
def PatternCount(Text,Pattern):
count = 0
for i in range(len(Text)-len(Pattern)+1):
if Text[i:i+len(Pattern)] == Pattern:
count = count+1
return count
def FrequentWords(Text, k):
words = []
freq = FrequencyMap(Text, k)
m = max(freq.values())
for key in freq:
if m==freq[key]:
words.append(key)
return words
def FrequencyMap(Text, k):
freq = {}
n = len(Text)
for i in range(n-k+1):
Pattern = Text[i:i+k]
freq[Pattern] = PatternCount(Text,Pattern)
return freq
# Now set Text equal to the Vibrio cholerae oriC and k equal to 10
Text = """TATCGCAATGAATACTTGTGACTTGTGCTTCCAATTGACATCTTCAGCGCCATATTGCGCTGGCCAAGGTGACGGAGCGGGATTACGAAAGCATGATCATGGCTGTTGTTCTGTTTATCTTGTTTTGACTGAGACTTGTTAGGATAGACGGTTTTTCATCACTGACTAGCCAAAGCCTTACTCTGCCTGACATCGACCGTAAATTGATAATGAATTTACATGCTTCCGCGACGATTTACCTCTTGATCATCGATCCGATTGAAGATCTTCAATTGTTAATTCTCTTGCCTCGACTCATAGCCATGATGAGCTCTTGATCATGTTTCCTTAACCCTCTATTTTTTACGGAAGAATGATCAAGCTGCTGCTCTTGATCATCGTTTC"""
k = 10
print(FrequentWords(Text,k))
def Reverse(Pattern):
reversed = ""
for char in Pattern:
reversed = char + reversed
return reversed
def Complement(Pattern):
complement = ""
for char in Pattern:
if char == "A":
complement+="T"
elif char == "T":
complement+="A"
elif char == "C":
complement+="G"
elif char == "G":
complement+="C"
return complement
def PatternMatching(Pattern, Genome):
positions = [] # output variable
for i in range(len(Genome)-len(Pattern)+1):
if Genome[i:i+len(Pattern)] == Pattern:
positions.append(i)
return positions
def SymbolArray(Genome, symbol):
array = {}
n = len(Genome)
ExtendedGenome = Genome + Genome[0:n//2]
for i in range(n):
array[i] = PatternCount(ExtendedGenome[i:i+(n//2)], symbol)
return array
def FasterSymbolArray(Genome, symbol):
array = {}
n = len(Genome)
ExtendedGenome = Genome + Genome[0:n//2]
# look at the first half of Genome to compute first array value
array[0] = PatternCount(symbol, Genome[0:n//2])
for i in range(1, n):
# start by setting the current array value equal to the previous array value
array[i] = array[i-1]
# the current array value can differ from the previous array value by at most 1
if ExtendedGenome[i-1] == symbol:
array[i] = array[i]-1
if ExtendedGenome[i+(n//2)-1] == symbol:
array[i] = array[i]+1
return array
def MinimumSkew(Genome):
positions = [] # output variable
skew = SkewArray(Genome)
x = min(skew)
for i in range(len(skew)):
if skew[i]==x:
positions.append(i)
return positions
def SkewArray(Genome):
skew = [] # output variable
skew.append(0)
for i in range(len(Genome)):
if Genome[i]=="G":
skew.append(skew[i]+1)
elif Genome[i]=="C":
skew.append(skew[i]-1)
else:
skew.append(skew[i])
return skew
def HammingDistance(p, q):
distance = 0
for i in range(len(p)):
if p[i]!=q[i]:
distance+=1
return distance
def ApproximatePatternMatching(Text, Pattern, d):
positions = [] # initializing list of positions
for i in range(len(Text)-len(Pattern)+1):
if HammingDistance(Pattern, Text[i:i+len(Pattern)]) <= d:
positions.append(i)
return positions
def ApproximatePatternCount(Pattern, Text, d):
count = 0 # initialize count variable
for i in range(len(Text)-len(Pattern)+1):
if HammingDistance(Pattern, Text[i:i+len(Pattern)]) <= d:
count += 1
return count