-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprotein_synthesis.py
106 lines (88 loc) · 15.2 KB
/
protein_synthesis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
def transcribe(dna: str) -> str:
rna = ""
for char in dna:
if char == 'A':
rna += 'U'
elif char == 'T':
rna += 'A'
elif char == 'G':
rna += 'C'
elif char == 'C':
rna += 'G'
else:
print("Bad character in sequence!")
exit(1)
return rna
def translate(rna: str) -> str:
amino_acids = ""
codon = rna[:3]
index = 3
while codon != "AUG":
codon = codon[1:] + rna[index]
index += 1
if index == len(rna):
print("No start codon!")
exit(1)
amino_acids += codon_to_amino_acid(codon)
codon = ""
for char in rna[index:]:
codon += char
if len(codon) == 3:
if codon == "UGA" or codon == "UAG" or codon == "UAA": # stop codon
break
amino_acids += codon_to_amino_acid(codon)
codon = ""
return amino_acids
def codon_to_amino_acid(codon: str) -> str:
if codon == "GCA" or codon == "GCC" or codon == "GCG" or codon == "GCU":
return "A"
if codon == "AGA" or codon == "AGG" or codon == "CGA" or codon == "CGC" or codon == "CGG" or codon == "CGU":
return "R"
if codon == "AAC" or codon == "AAU":
return "N"
if codon == "GAC" or codon == "GAU":
return "D"
if codon == "UGC" or codon == "UGU":
return "C"
if codon == "GAA" or codon == "GAG":
return "E"
if codon == "CAA" or codon == "CAG":
return "Q"
if codon == "GGA" or codon == "GGC" or codon == "GGG" or codon == "GGU":
return "G"
if codon == "CAC" or codon == "CAU":
return "H"
if codon == "AUA" or codon == "AUC" or codon == "AUU":
return "I"
if codon == "UUA" or codon == "UUG" or codon == "CUA" or codon == "CUC" or codon == "CUG" or codon == "CUU":
return "L"
if codon == "AAA" or codon == "AAG":
return "K"
if codon == "AUG":
return "M"
if codon == "UUC" or codon == "UUU":
return "F"
if codon == "CCA" or codon == "CCC" or codon == "CCG" or codon == "CCU":
return "P"
if codon == "AGU" or codon == "AGC" or codon == "UCU" or codon == "UCC" or codon == "UCA" or codon == "UCG":
return "S"
if codon == "ACA" or codon == "ACC" or codon == "ACG" or codon == "ACU":
return "T"
if codon == "UGG":
return "W"
if codon == "UAC" or codon == "UAU":
return "Y"
if codon == "GUA" or codon == "GUC" or codon == "GUG" or codon == "GUU":
return "V"
print("Invalid codon!")
exit(0)
if __name__ == "__main__":
# Tests:
dna = "GATACCCATAAGCAGGGATGACTGTTG"
rna = "CUAUGGGUAUUCGUCCCUACUGACAAC"
amino_acids = "MGIRPY"
print(transcribe(dna) == rna)
print(translate(rna) == amino_acids)
rna = "AUGCCCAUGGGAUUAGUGUGGCACAAACAAGGACCACUAGAAAGGAUAUCUAUAAGAGGAGUAAUAGGAGUUAGGAGCGGGUAUAACGAAACCAUUCGAAGGAAUUGGGUCAUGUUAGUAAGUAAAAGCGCCUUAUUCGUAUCCACAUGCUGCCAAUGUAACCCCCCUUACCUGACUUGUUAUAAGCAGUUGAAGAGUCCAGACGUGACACGUUUUGCGCGCGCUCAUGACAUGGAUCAUUUUAGAGACCACACUCAUAUGGCUGGGAGGACCAACUUGGAACAAACGUUUUGUGCUCAACCCGUACAUCUGACUAUGGACCUAGAGUAUUAUCAGGACCCUCCAGUGGCUUAUGUACUACAGUACAUGGUGCCGAGGCGAAUGCCAGCUCUGUCAGUAAUCACGAAUCCUCCCAACCAAGAAUUGCAUUCACUCUGGUCAUAUCUGCAUUUGUCGGUUAGUCAAAUCUCUCGGGGGCGUCUAACUUGCGUGACGCUGCUACUAGUGAUCUGGGACAACCAUUUAUUACGGUACUCCUAUCUUUGCCAGGCCUUCGAAAUGGUUAUGAGCUCGAACAUCCGCUCCUUGGGAGCUGAGACGUUAAAGGUGUUAACAGAUCCGAAUAGAGAGUGCGCUUCGGUCACCAUCCCACAUCUCACUCAGCAAUCCGCUCUGAGUAUUAGAGCACCCCCCAGGUACCUUCACACUGAAUUACGGCGUUGUAGAACAUCUUUAGCACGGACGACCCUUCGGUUCGUGUCCAGUUGUACCUCUCGAUUUGGUGCUUCGAUUCCGAGCUUUCAUGAACUCUACUGCUCGCUAUACACAGGGCUCUCCUUCGUUCAUAUAUCUCGAUCAAUUAAGAACGCGGUUGAUCAAACAUGCGGCAAACCUAUCGACAAUACUCAGAUUGUGAUAAGCUUCCAGAGCUCUCGUCUCCAUCGAUGCAGUCUCAUAGCUCGUCUACAGGCCGACCGUACUUUCUUCCUUUAUAAGCGAGGGUGGUGCCGGGGCGUGUGCUCUAUAGUUAUAUACUUGACCGGGAUCUGCUACGUCAAACUGUGUUGUUACGCUACGGGCAUUAGCACCGUUAGCGACAGCUAUAAACCAAGGGGUUACGUUCAUCUUCAGGUAUUUCUCUUCCGACACGUGACUGUGUACGUAAUUCGUACAUAUGAGAAUUCAGUUACCGGGGCUUCACGAUAUCAAGCGGACUACAUCAUGGGGCGCGACUUACCUACUCUCGUAGAUAGUUUGCCAGUGGUGUCGUAUAGUUGGAACCAGUACCUGCUUCUAGUCUGUGGACCACGUCGGGGAGACCUCUUAUGGCCCCUAUUAGGCAGGGAUCCAACAGUUAAAUUUAAACCGCGGACCUACCGUCAACGGAGGAGUUUUACAGCUUUCAUGACGCGGAAGUGCGUUUCACUAUGGGCGCAAUUGACAGCUCGCCGUCGGACAAGCCGCAAAUUAAUUCGGGCACUAACUCACGGGUGGAAAAUGCGAAUGUCAAGACACGCAGUGGUCUAUACAGGCAUAGAGGGGAUCCUCGGGUCGAAAACAGAGUUAACGGUCCCAACUCAUCCGCUGUACAUCUUGCUCCCGUGCUCGGGUCCAAGUUGCGUGCUCAUGACUGGGGUCCGACCAUUUUGGUCUUCGUCCCAAGAUAGUCGUUGGUCAACAACCAUCAGGCCGGAGGGUGGUGUACUGAGCGUAAGUUGCAGACAGAUAGCCAAUAGCACCUUCUCGGUGCUCGAGUCAUUAGGUCUCUUUAUAGAAGUUCGGCACGGCCACGGGAAAAUCCCGCUCUACCGUAGCUCCACCUGUAGUAAUUGUUCACAUGUCUGUCAGUCUAAUGAGUGGACAGCGUGGUUCUUGAACUCCCCUGCGGCCGGGCCGAACCAGUGCCAAAUUGUUUAUAAUACGAAAUACUGUAUUGCAGGGUACGCCCCGAGCCCGUUACUAAGCACAGCCCGCGCAGCGAGUUAUCGAUAUAAAUGCUCGUGGUACUUCCUUCUUCUCUUUUUGACGUGUUCCCUCAUCUCGGGGCAAUUCUUGGAACCGGGGAUUAUGAAGUGUGCACUGGGCAUGCUAAAGGUAGGCUCGGAAUGCAAUCUCGAUGAACCGCACGUUAGUUCCACCGCGGGUUACCACGGCCACGUGAACACCCUCUGUUGCAUUAUUGCAGUUAGCCCGCCGCUAAGCCUGCGGGUUAAAAGACCGGCGGGGAGAGUUGUCUUCGAAAUAGGUCCCGCAUGUGAUCAGGACGGUGGUCAACUUGCGUCAAAUAUAGUGAUAACUCGUAAUCCGACUGGGUGGCACCCAUGGACUUCCCGAGUCAGCGCCCUUACCAUACGCUCACUACAAUGCCCACAGUGUGUGAACGAAGGCUGGCACUGGUCGCACCCUGAGCCAGGACAUAUGGGACGAUACUUGCACCCAAACUCAAUCCCUUACGAAGCCUGCGUCUCUACCUAUACAAUUGCCGAAAAAUUCUCGGUUCAUUUUAGUACCCGGAGCAUUGACUGGGAGUGCCCGGUCCGAAUACUCGCGACGUACACAAGACAGCUGGAGUUGCGUUGCCUUACCGGAACGUGCUUUUCCUACUGGAAUACCCCAAAGAUUCCUACGGUAACACCCCAUGUAGCACGGAAAUCUAAGGAGACUGACCGAAACGCCUCGAUUCAGCCCCCGAUGGCUGCUACGGUACCAGCAACAGGCUUAUCGAAGCAAUGGAUUGUCAUCCAAUCUUCCAGGGAUCUCCGGACUGACGAUGAGAUGGAUACCACCUUUCAGACACAUCGGUAUGGCUUCGGAAUAUCAACAACGGAGUGCUACACUCUACCGAGCCUAUACUCUGCCCCAAAGAGUAGAAAUACCCACACUUGUCACGCAUCGCCUUCCAACGAUAGUUGGUCCUGCUUCAAGCUCUGUGGAGUAAGUAUACGUGAUAGGAAAAGACGAGUCUGGACUGCUUCGCGAUGUAAUGACCUAGCGUCAGAAAAGGGUGGCCGGCAUGGUACACCACCCACAAGAAUCCCACCGUCGUUGCCUAGAAAAACGACACAGACUGGUGCAGCGAACCUAGCUUACUGGUCCCAGUUCAUUAACGGCGACAAAUCGCCGGAUGGCUAUACUAAGCAUGUUGGCACGGACGCCAUAUAUGCAGCGCGUGUUCAAGCGCACAGCCUGACGUGCUCCCGGCUAUCCAGAAUUGUGUGUAGUGGAUCCGCCGUGGGUUGGGAUGGAGACGAUUUUCGUGAGUCGGACGUUGACCGAGAAUGCGAGGUUACGAGUGGCCCAAGGCCCCAGAGAUUUUGUAUUUACAUCAUGUCCUCCGUUUCGACCGACGGUGGAGUAUGUCGAAAGAUCUGGAUAUUCCACCUCUUAUCCAAUUCAUGUAGGAGCUUGUUUUCCCCGUCAUCAGAUAGGGAACAGAAGCUCGGAAUAAUGGCGCAGGCAGAGGUGUUCUGUCACGCCCUCUCAAAAUUUCUGUUUCGGUUGUGUUCUAUUUCAUCGCUCAGGGCGCUCAGUUCUUAUCCCAGAUCGGGGCGAGUUCUCUCUAUUGUGACAAACAUACCGCAGUGGGACAGCCGCCCUAGGCUACGAAUUGACUACCAUCAAUUUACACCAAUUUUUCAUAACCCGCCAGGUAAACUCCUGCAGCCUAACGAUACGUGCCUGGGCUGUUCAAACUGCCUCCAGAGCUCGCCUCUGCUUUUUACCCGGGCAGGUCUUUUGGGCCUAUUCAGAACCAUGGCCCCUAUAACAUACGGCUGCAUGACUCGCUCUGCUUGGUCUUCGUUACAGCGUGGAGGGCCACUCGACGUUAUCCUCUCUACGUCGAGAGCUACCCUAUUCAAGCGGUGUGUAAGAUCACGAUUCAGAAAUCCAUGUGGUAUUAAUGUUCUCACAAACUUUCACGAACCGCGAGCACUUCUAUUUAGCGCUCCACGCGGGAAUCCGAGCUAUACGGUCAUGUUCGCCAGUACGGCAGCCACGUGCGAGCGCCUGGCGUUAGCACGUCAUCGUACUUAUUCCCGAUUCCUCUUUGAACGUACUAGAAAACAUGCGCUGAAAGGGAGAACAAGUGAUCAAGACGGCCUAAGUGCCGGUGUCACGUGGAAGGGCGUGAAUUCUGGCCGGUACCUUUAUAGCACGUGUCGUCAUGGGCUGCGCAGGUUGAUCCGGACGAUGAUGAAGCAAAUUGGAAGACUAUUUUAUAGAACGAGGUGCACGCUUCUCGUCCACAGAGACCCUGGCGCGGAACACUACCUUAGUACGCCAAUAGUAAAGCAAGGACGCGGCUACCACCGGCGUACACGACUUCCAUUUCUGCUUUUUUACGACCAAUGCCCAGCGCGCGGAGUGCCGCUAUUAGUACGCUUAGUGACCAAUGCUUUAUCGCCUUAUAUUACCAUGUCAUAUGCAUGUCAGACUUGCAACAGAGCCCAGGAGAGCGUCAGCCUAUUACCGAGUACAUGUGCUUACCCCUACCGGUCAAUCAAUUCCGAUCCUGUUGGAGCUUGGAAAGUGGGCUCCCUAAAUACGCACUACAUUUGUAAGUUACCCAAGGUAUCACCGCCGAGAUGGGCAAGCUCACAGCAUAUCGCGAGCACAGUCGUUAGAAGCGAUACCCAUAGUAGAGACGAGUCGAUCAAUGGACAUGGCAGGCUUAAAGUUGCAAUCAUUGUUUGUAGGGUAACCGCAAGAAGUGUAGCAGAAGUCGGAGGUUACUGGAAAAAAAGACACAGGAGUCCACGAAGUAUAUUUGAUAUGAAAAAUAGAGACACUUGGCACGAUGGUGGGCGCGGCCCUACAUCAUCGUCGGACCAUUAUUAUCCCAUAACAGGCAUUCCUGUUCUCAGUACUCCCUCCCGGAGGCCUCAAGCUCGGCUCACCAUUAAAUCGUCAUCAAUUUGGGCUUAUCGAGAUCAACCGAUGCUCACACAGCCAUGCAUUACGUACCCGGUACUGCUCUUGCAAUGGGCCGUAGCUUCGCGAAUGAGUGCAAUAUCUGGCAUUCGGCUUGCGGGUCUGUGGGGAGCAGCUAAUGCACCAGUCCAACUCUUCCUCUACCUCAGUCAGUGCGCACACUUAGGAACUUAUUGUGUAUAUCACGAAAUGGGCCCAAUGUAUAUGGUUCUACCAAACCAACGGCGACUGCGACGCUUUAGUCUUGGAGGACAGAUUCUAUGUUACCCCUUCAGGACGACUUACAGUCGGGCUGUCGUUAGGCCUUAUCCUGAUACGCCGCAUGGUUACAGCGGUAACGGCGUAAACACCGGCUUCCUAAGACGCUGGCCUGGACCUGUCCCGUGUCUGUGGGUUAAGGGUGUUGCCCACCACGCGUCUAAGCUACGGUGGAAGACACUUGCUUGUACUGCGUUAUCCCUGCCUGCCCUACCUAGUCACCCUGGAUUAUUGAAAAGCCUUCUCAAAAGAGGCGAAGUGAAAAAACUGGAGGCAGCUCGCACCGUCUUGCGAGCUUUACAACCGCACCGAACAAAUGGCGGAUAUACUAAGGAACUCGGUCAGGAGGAGGAGCACUGGUUCAGGUACUCAUUCGGAGGUACGAUCGGGGCCUUGUACGAUCGUAAGGUUGCUGUGUGGUACCGCAGCUCACAGGUAAUCUACAAUGGUCCUAUUUAUCGUCUAGUACAGGAGCCGAGUUGCGCCUCUCCGAUCACCUUAUUCAGCCGAGUACUACUUAAUGUUGUAUCUUAUCUAGAGAUAACGGUUAAACGACAUCUGUUCACACCAAACACCAUCCCCGGACGAGGCCUUUCUCAUCGAAAGGGGGCAUUAUUUUUCCUAAUACCACCCUAUCCAAGCUCGUUCGAAUUAAUAAGAAAACCCUACCGCCGCACACAGCCUGCUUUUCCGGGAUCCCUAGGCGAUGAUCCCUUGCUCCCCUGUAUCUGUCGGUCUAAACUGCGACUCAGACGGAUAACUGUGUAUGGUUCAAGCCGGCCAGUUGAAGACGUGGGAACCACGGAGUCUAGAUGUCCAAAAACAUUUUACUCUCUCCCGCUCGCGCUGUGUUGGUGCCGGAGUACGGGGGUGGGCAUCUACGGCUAUCUGGAACCGGACUACCCGUAUUAUACGAGGCGCGCCCCCGCCAUACCAAUUUGGAGACACCCUGUAUAUCGCGCUUUCGUACAGUUAACGUUAGCGCAGGCGACGGUACCCGGGCCGAAGCCGUUUUUAUUGCUACCCGUGGGCACCGGACCGUUUAUAAAAAUUUUCGGCCGCCACGGUUUUGCAGUUUUAGCUGCCGUACGAUAUUGUACGCUGUGCUCGGACCCUAUACGGGGACCCGACAGCCGACUUAGCCUAGUAACAGAGAGUGCUCGCCAGGUUCUCACUCCGGUCGAGUAUACCGAUCUGCAACUGUGUUCACUGGGGAGCAUUCAAAAGUCAAAACGGCUCUCAAGACUAUCCCUAACCAAAUUUGAUGAAUUAGCCGCCCGAACAAAUGCUCGGUUCGUGCGUCCCGUAGGAUAUCAACAGGUUAAGCCCCGGGUGAUUGAAACUGAAACUCCUUAUAGGACGUAUCGGCCAAACCCGCCUACAGAUGUAGACGAGGAGCACAAACAUCGUUUACUAGCUGUCAAAAGAUACAUAGGUUUCAGAGGGACACUGUCGGACAGACCCCAGGCGGCGCUCAUUAAAGGUGAAUGCAAGGCCAGCAGGUCGGGGCAUCUUCUUGAAAUUCAUAGAGGAUCAGAGAGACUUGAUUGCCGUAUCACGCUAACGCUACUUAAGGACCCACUUGGCUACUUUUACGGAGUGGACAGUCAGCCACCGGCGGUGCAUUUGAGUUACGACGCGUUCUCGCAACUCAUCAAAAACAGAUGUUUUAGAAAUUACAAGUCGCGAGCUUUCCAAGGCACAAAUGUAGCACGAAAAUCAAUAUGGGAUCGGUCAAGUGACCCUGGUUUGAGGGAGCCAUUGUGUUGGACACUAGACAAGCGCUGGGCCAAGUACACACAUCAACCUCAACCCUGCCAGCCGGUUCCCACCUCCAAUGUAUUUGAAUGCAGGCGUGGCCAAACCGAGGUACGACAUAACGCGUUGGUCUAUAGUCCAACCUUCGCGAAUUUUCGUGUAUCAGCGGACAGUACUGUCUUACUUCCAGCCACCGGGUCAUUAAUCGUUCCAUCUGGAAGGUCCGAGCGUAACCCGGAUUAUCACGCGCCGUGCUCUUUAUGUCCCAACCUCAACUCUGGUUUGCCGGGUAGGAUAGUAGGGACGCCUCGAACCGACAAGCGGCGGGCCGUCACGCAACUCAGGGUGUCCGAAGCAAGUUGCCCUCAGCGGAAUAGAUCGCCUGACUCGUGCGCACUAGGUAAAUGCCCGCUUCCGUUAGAUAACUCUACAAGCCGAUUUUGCGGCCCUUGCGCACAUUAUUCAGACCUAACCACAUUUUUACGGCUCUUAUUCUCACAGACUUCGCCCGAUGGCAGGUUUUUCCAUCAGAUAAUAGCGGUACUCAAAGUGGGUAAAUGCAUUAGAGCUACUGCAGCAUUGAACCCCGCCUCGUACCUUGCCAGCCGUGCUGCACAGUCUGGAAAUAUUGUGGAAGGUCAUCGGGGACCGAUUUUAAGCAGGAGCAUGCUAGUAAAGCGAAACUGGCGUUGUCACGGGUCCUUACCAUGCAAUAGGACGACGGGAGAAACACUUACGAUAUUAGGCAUAAUGUACGGAUCCGUGGGUGGCUCCAGAAUAUUACAAGCAGUAGUGCAUUCGGUGGCUAGACGCGCCCCGCAGGCGACCGAUGGUUUUGGGCAUUUGCUGACCAAACCGUUCGUCAGAGCCAGAACAUCAGAAGAGGAUAAAGGGUGGGUGGUGACUUCUAUAUUCCCUCGACGGAACGCCUAUGACCCACUCGGUAAAGUGGGCCCAACAUGGCGUACGCCCGGCUAUCGCAUGAUACCUCAAGCAGAUAAGUGGGGGAUCGGACCGACUAACGGUGAUACAGCCAACUGCGUAGGCGGGUGCCUCAGGGUAAUUUGCCCGUCCAGGGCAUCGAAGCAUCACGCAGCAAUAUGCUUAUCCGGCACAAGCUUCGCACGUGUGGGAAAAGGAAGAGAGGAAAAAGUCCGUCAUACGGAGUCCCAAGCGUGGACACGCUUUAAUCUGGGUAAUGCGAGGCGAGGUGGGGUGGACCAGAUUUUACGGAAAUUAACCAUCGAUAGGGGUGGGCUACGUGGGCACGCGAGGAUAAGUACGAAAUACCGGCCCCGCCUACAGAGGAAUGCCACAAUGGCCGGGAAGGAUACCCGACAUUAUUUGAGUGCUUUCAAGUUCGAACUUGCCACGCGAGCGUACAGAUCGGCGCUAAUUCUCUGGGAACUUAUCCAGAGGAGUACUAGAACGCUGGCUUAUUCUACGGGGCGGAUUGAACUCCCAACAACAGUGCCCCACUCUUCGCCCGGUGUUGUGCUAUUAGGAGUAGUGAAUUUAAGAGGAGAAGCCACACAGCAUGCCCUAUUCUCCUAUCUGGAUUCUACUACGCCCUAUGAGUGCUUAGACAUAGCCCCCAAUAGCUGGUUCACGUCCGAGUCAUCCUGGCCACGUAGCGAUGCAAUACUAUCAUACCGGUUGGUUAAUGACCCGGCUGAACUUCUUAGUGCAGGGCCAUCAACGCUAUCACACCCGCUCUGCGGAUCCAUCGGCCGCUCCUUAACACGAGGAGACAGAUUAACGAAGGACUACUACGUAGUACACAAUUUGAGCUCCUCGCUUACGCGAUUGGACCGUCAUAGUUCAGCGACGCGCAGCUUGCGACCAUGCCUGUGGGUCACAACAGAAAAUACAAUUUAUCUUCAGAGGACCUGUUUCUAUUCUGAUUGGGAUUGUAUGCGGUGCCUUUCAGACAAGGGAGGCUACGAUGGCAAUUAUUCGAGGGUAACCGCUUAUCACUAUUGUCGGCCAAAAGUUCACGAUGAGGCUACAGUGUUAUACAAACUUAAGGGUACCCCCAUACGAUUUGGUCCGGCACACAACCACAGAGGUGCCUUAGCUCUUCGUCCUGUUGAAACCACCCCCCUGAGGACAUCUCCUGAAUAUGUGGAACACCCUUACGACGAGGAGUCUAAUACAGGUCGGGGGGCAGCUCGGCAGGAAGGGCUCGAAGUUCAGCUCAUUGCAGGCCAGUACGACGCUUGGCAAUGUAUGGAUAACUGGACGGUUCACAUCAGUCGACUAUGUACCGGGGUACAGUCCCCACCGUUGGCUCGUAGAGUGUCCUAUUUAGGAAUCAGUCCGGACAUUUGGGGCCAUAGUUUUAGUUUCCGCUACACUUGGCCCUUCAAAAGCUCGACCGGGGCGAUUGAUUGGCGCCACGGGUUGUACUCCUUCCUGUCUACCUUAAUAAUCGGUUGCUGCUCACGAGCUCUGACUGAGAGAUCACGUCGAGGACCAGCCCGUUAA"
amino_acids = "MPMGLVWHKQGPLERISIRGVIGVRSGYNETIRRNWVMLVSKSALFVSTCCQCNPPYLTCYKQLKSPDVTRFARAHDMDHFRDHTHMAGRTNLEQTFCAQPVHLTMDLEYYQDPPVAYVLQYMVPRRMPALSVITNPPNQELHSLWSYLHLSVSQISRGRLTCVTLLLVIWDNHLLRYSYLCQAFEMVMSSNIRSLGAETLKVLTDPNRECASVTIPHLTQQSALSIRAPPRYLHTELRRCRTSLARTTLRFVSSCTSRFGASIPSFHELYCSLYTGLSFVHISRSIKNAVDQTCGKPIDNTQIVISFQSSRLHRCSLIARLQADRTFFLYKRGWCRGVCSIVIYLTGICYVKLCCYATGISTVSDSYKPRGYVHLQVFLFRHVTVYVIRTYENSVTGASRYQADYIMGRDLPTLVDSLPVVSYSWNQYLLLVCGPRRGDLLWPLLGRDPTVKFKPRTYRQRRSFTAFMTRKCVSLWAQLTARRRTSRKLIRALTHGWKMRMSRHAVVYTGIEGILGSKTELTVPTHPLYILLPCSGPSCVLMTGVRPFWSSSQDSRWSTTIRPEGGVLSVSCRQIANSTFSVLESLGLFIEVRHGHGKIPLYRSSTCSNCSHVCQSNEWTAWFLNSPAAGPNQCQIVYNTKYCIAGYAPSPLLSTARAASYRYKCSWYFLLLFLTCSLISGQFLEPGIMKCALGMLKVGSECNLDEPHVSSTAGYHGHVNTLCCIIAVSPPLSLRVKRPAGRVVFEIGPACDQDGGQLASNIVITRNPTGWHPWTSRVSALTIRSLQCPQCVNEGWHWSHPEPGHMGRYLHPNSIPYEACVSTYTIAEKFSVHFSTRSIDWECPVRILATYTRQLELRCLTGTCFSYWNTPKIPTVTPHVARKSKETDRNASIQPPMAATVPATGLSKQWIVIQSSRDLRTDDEMDTTFQTHRYGFGISTTECYTLPSLYSAPKSRNTHTCHASPSNDSWSCFKLCGVSIRDRKRRVWTASRCNDLASEKGGRHGTPPTRIPPSLPRKTTQTGAANLAYWSQFINGDKSPDGYTKHVGTDAIYAARVQAHSLTCSRLSRIVCSGSAVGWDGDDFRESDVDRECEVTSGPRPQRFCIYIMSSVSTDGGVCRKIWIFHLLSNSCRSLFSPSSDREQKLGIMAQAEVFCHALSKFLFRLCSISSLRALSSYPRSGRVLSIVTNIPQWDSRPRLRIDYHQFTPIFHNPPGKLLQPNDTCLGCSNCLQSSPLLFTRAGLLGLFRTMAPITYGCMTRSAWSSLQRGGPLDVILSTSRATLFKRCVRSRFRNPCGINVLTNFHEPRALLFSAPRGNPSYTVMFASTAATCERLALARHRTYSRFLFERTRKHALKGRTSDQDGLSAGVTWKGVNSGRYLYSTCRHGLRRLIRTMMKQIGRLFYRTRCTLLVHRDPGAEHYLSTPIVKQGRGYHRRTRLPFLLFYDQCPARGVPLLVRLVTNALSPYITMSYACQTCNRAQESVSLLPSTCAYPYRSINSDPVGAWKVGSLNTHYICKLPKVSPPRWASSQHIASTVVRSDTHSRDESINGHGRLKVAIIVCRVTARSVAEVGGYWKKRHRSPRSIFDMKNRDTWHDGGRGPTSSSDHYYPITGIPVLSTPSRRPQARLTIKSSSIWAYRDQPMLTQPCITYPVLLLQWAVASRMSAISGIRLAGLWGAANAPVQLFLYLSQCAHLGTYCVYHEMGPMYMVLPNQRRLRRFSLGGQILCYPFRTTYSRAVVRPYPDTPHGYSGNGVNTGFLRRWPGPVPCLWVKGVAHHASKLRWKTLACTALSLPALPSHPGLLKSLLKRGEVKKLEAARTVLRALQPHRTNGGYTKELGQEEEHWFRYSFGGTIGALYDRKVAVWYRSSQVIYNGPIYRLVQEPSCASPITLFSRVLLNVVSYLEITVKRHLFTPNTIPGRGLSHRKGALFFLIPPYPSSFELIRKPYRRTQPAFPGSLGDDPLLPCICRSKLRLRRITVYGSSRPVEDVGTTESRCPKTFYSLPLALCWCRSTGVGIYGYLEPDYPYYTRRAPAIPIWRHPVYRAFVQLTLAQATVPGPKPFLLLPVGTGPFIKIFGRHGFAVLAAVRYCTLCSDPIRGPDSRLSLVTESARQVLTPVEYTDLQLCSLGSIQKSKRLSRLSLTKFDELAARTNARFVRPVGYQQVKPRVIETETPYRTYRPNPPTDVDEEHKHRLLAVKRYIGFRGTLSDRPQAALIKGECKASRSGHLLEIHRGSERLDCRITLTLLKDPLGYFYGVDSQPPAVHLSYDAFSQLIKNRCFRNYKSRAFQGTNVARKSIWDRSSDPGLREPLCWTLDKRWAKYTHQPQPCQPVPTSNVFECRRGQTEVRHNALVYSPTFANFRVSADSTVLLPATGSLIVPSGRSERNPDYHAPCSLCPNLNSGLPGRIVGTPRTDKRRAVTQLRVSEASCPQRNRSPDSCALGKCPLPLDNSTSRFCGPCAHYSDLTTFLRLLFSQTSPDGRFFHQIIAVLKVGKCIRATAALNPASYLASRAAQSGNIVEGHRGPILSRSMLVKRNWRCHGSLPCNRTTGETLTILGIMYGSVGGSRILQAVVHSVARRAPQATDGFGHLLTKPFVRARTSEEDKGWVVTSIFPRRNAYDPLGKVGPTWRTPGYRMIPQADKWGIGPTNGDTANCVGGCLRVICPSRASKHHAAICLSGTSFARVGKGREEKVRHTESQAWTRFNLGNARRGGVDQILRKLTIDRGGLRGHARISTKYRPRLQRNATMAGKDTRHYLSAFKFELATRAYRSALILWELIQRSTRTLAYSTGRIELPTTVPHSSPGVVLLGVVNLRGEATQHALFSYLDSTTPYECLDIAPNSWFTSESSWPRSDAILSYRLVNDPAELLSAGPSTLSHPLCGSIGRSLTRGDRLTKDYYVVHNLSSSLTRLDRHSSATRSLRPCLWVTTENTIYLQRTCFYSDWDCMRCLSDKGGYDGNYSRVTAYHYCRPKVHDEATVLYKLKGTPIRFGPAHNHRGALALRPVETTPLRTSPEYVEHPYDEESNTGRGAARQEGLEVQLIAGQYDAWQCMDNWTVHISRLCTGVQSPPLARRVSYLGISPDIWGHSFSFRYTWPFKSSTGAIDWRHGLYSFLSTLIIGCCSRALTERSRRGPAR"
print(translate(rna) == amino_acids)