From 795f31a7a05bfff64c352acae10a5ca52cd1fd54 Mon Sep 17 00:00:00 2001
From: nikita <zherko.na@phystech.edu>
Date: Tue, 26 Sep 2023 22:37:23 +0300
Subject: [PATCH 01/25] Add translation, mutations and level_of_hydrophobic

---
 protein_tools.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 protein_tools.py

diff --git a/protein_tools.py b/protein_tools.py
new file mode 100644
index 0000000..0ef6f82
--- /dev/null
+++ b/protein_tools.py
@@ -0,0 +1,60 @@
+def level_of_hydrophobic(protein):
+    gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+
+    count_of_gydrophobic = 0
+    if is_protein(protein):
+        for i in range(len(protein)):
+            if protein[i] in gydrophobic_aminoacids:
+                count_of_gydrophobic += 1
+
+    percentage = count_of_gydrophobic / len(protein) * 100
+
+    return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
+
+
+def translation(seq):
+    """
+    """
+    gene_code = {
+        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
+        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
+        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
+        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
+        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
+        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
+        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
+        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
+        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
+    }
+    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
+    protein = []
+    for triplet in triplets:
+        for aminoacid in gene_code.keys():
+            if triplet in gene_code[aminoacid]:
+                protein.append(aminoacid)
+
+    if is_protein("".join(protein)):
+        start = protein.index("M")
+        stop = protein.index("*")
+        return "".join(protein[start:stop + 1])
+    else:
+        return "This sequence doesn't include the gene."
+
+
+def mutations(seq, protein):
+    correct_protein = translation(seq)
+
+    if is_protein(protein):
+        bank_of_mutations = []
+        for i in range(len(correct_protein)):
+            if correct_protein[i] != protein[i]:
+                bank_of_mutations.append(f'{protein[i]}{i + 1}')
+
+        if len(bank_of_mutations) == 0:
+            return "Protein without mutations."
+        else:
+            return "Mutations:" + ", ".join(bank_of_mutations) + "."
+    else:
+        return "It isn't a protein."
+
+

From 490b985c5749ea2806a5ef50b68127d3d51546cd Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Wed, 27 Sep 2023 01:24:24 +0300
Subject: [PATCH 02/25] Add directory HW4_toropov

---
 HW4_Toropov/protein_tools.py | 54 ++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 HW4_Toropov/protein_tools.py

diff --git a/HW4_Toropov/protein_tools.py b/HW4_Toropov/protein_tools.py
new file mode 100644
index 0000000..87bff7b
--- /dev/null
+++ b/HW4_Toropov/protein_tools.py
@@ -0,0 +1,54 @@
+alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
+amino_acid_masses = {
+    'A': 71.03711,
+    'R': 156.10111,
+    'N': 114.04293,
+    'D': 115.02694,
+    'C': 103.00919,
+    'Q': 128.05858,
+    'E': 129.04259,
+    'G': 57.02146,
+    'H': 137.05891,
+    'I': 113.08406,
+    'L': 113.08406,
+    'K': 128.09496,
+    'M': 131.04049,
+    'F': 147.06841,
+    'P': 97.05276,
+    'S': 87.03203,
+    'T': 101.04768,
+    'W': 186.07931,
+    'Y': 163.06333,
+    'V': 99.06841
+}
+
+
+def is_protein(seq):
+    unique_chars = set(seq)
+    return unique_chars <= alphabet_protein
+
+
+def molecular_weight(seq):
+    molecular_weight = 0
+    for amino_acid in seq:
+        molecular_weight += amino_acid_masses[amino_acid]
+    return round(molecular_weight, 3)
+
+
+def run_protein_tools(*seqs_and_procedure):
+    procedure = seqs_and_procedure[-1]
+    seqs = seqs_and_procedure[:-1]
+
+    results = []
+
+    for seq in seqs:
+        seq = seq.upper()
+        if is_protein(seq) is not True:
+            raise ValueError("Invalid alphabet")
+        if procedure == 'molecular_weight':
+            results.append(molecular_weight(seq))
+
+    if len(results) == 1:
+        return results[0]
+    else:
+        return results

From 4fb8e575a66f468668301e8b4aa287dce8c16fa9 Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Wed, 27 Sep 2023 00:54:10 +0300
Subject: [PATCH 03/25] Add protein_tools.py

With functions is_protein, run_protein_tools, molecular_weight
---
 protein_tools.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 protein_tools.py

diff --git a/protein_tools.py b/protein_tools.py
new file mode 100644
index 0000000..87bff7b
--- /dev/null
+++ b/protein_tools.py
@@ -0,0 +1,54 @@
+alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
+amino_acid_masses = {
+    'A': 71.03711,
+    'R': 156.10111,
+    'N': 114.04293,
+    'D': 115.02694,
+    'C': 103.00919,
+    'Q': 128.05858,
+    'E': 129.04259,
+    'G': 57.02146,
+    'H': 137.05891,
+    'I': 113.08406,
+    'L': 113.08406,
+    'K': 128.09496,
+    'M': 131.04049,
+    'F': 147.06841,
+    'P': 97.05276,
+    'S': 87.03203,
+    'T': 101.04768,
+    'W': 186.07931,
+    'Y': 163.06333,
+    'V': 99.06841
+}
+
+
+def is_protein(seq):
+    unique_chars = set(seq)
+    return unique_chars <= alphabet_protein
+
+
+def molecular_weight(seq):
+    molecular_weight = 0
+    for amino_acid in seq:
+        molecular_weight += amino_acid_masses[amino_acid]
+    return round(molecular_weight, 3)
+
+
+def run_protein_tools(*seqs_and_procedure):
+    procedure = seqs_and_procedure[-1]
+    seqs = seqs_and_procedure[:-1]
+
+    results = []
+
+    for seq in seqs:
+        seq = seq.upper()
+        if is_protein(seq) is not True:
+            raise ValueError("Invalid alphabet")
+        if procedure == 'molecular_weight':
+            results.append(molecular_weight(seq))
+
+    if len(results) == 1:
+        return results[0]
+    else:
+        return results

From 058419501ebf7ed1c21cdaba9101a01324129c62 Mon Sep 17 00:00:00 2001
From: sofiyaga <sofiyaga@icloud.com>
Date: Tue, 26 Sep 2023 22:48:16 +0300
Subject: [PATCH 04/25] Add function calculate_length

---
 protein_tools.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/protein_tools.py b/protein_tools.py
index 87bff7b..08206f4 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -27,6 +27,19 @@ def is_protein(seq):
     unique_chars = set(seq)
     return unique_chars <= alphabet_protein
 
+def compute_length(*seqs: str):
+    """
+    Compute the length of the input amino acid sequence
+    
+    """
+    lens = []
+    for seq in seqs:
+        if is_protein(seq):
+            lens.append(len(seq))
+        else:
+            raise ValueError('Not a protein')
+    return lens if len(lens) > 1 else lens[0]    
+
 
 def molecular_weight(seq):
     molecular_weight = 0

From 8ecd46d9c61e6fa3838cd9485c44e9f4426a2f2a Mon Sep 17 00:00:00 2001
From: sofiyaga <sofiyaga@icloud.com>
Date: Tue, 26 Sep 2023 22:53:23 +0300
Subject: [PATCH 05/25] Add dictionary codon_table

---
 protein_tools.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/protein_tools.py b/protein_tools.py
index 08206f4..f2a2a93 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -22,6 +22,28 @@
     'V': 99.06841
 }
 
+codon_table = {
+        'A': ['GCT', 'GCC', 'GCA', 'GCG'],
+        'C': ['TGT', 'TGC'],
+        'D': ['GAT', 'GAC'],
+        'E': ['GAA', 'GAG'],
+        'F': ['TTT', 'TTC'],
+        'G': ['GGT', 'GGC', 'GGA', 'GGG'],
+        'H': ['CAT', 'CAC'],
+        'I': ['ATT', 'ATC', 'ATA'],
+        'K': ['AAA', 'AAG'],
+        'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
+        'M': ['ATG'],
+        'N': ['AAT', 'AAC'],
+        'P': ['CCT', 'CCC', 'CCA', 'CCG'],
+        'Q': ['CAA', 'CAG'],
+        'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
+        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
+        'T': ['ACT', 'ACC', 'ACA', 'ACG'],
+        'V': ['GTT', 'GTC', 'GTA', 'GTG'],
+        'W': ['TGG'],
+        'Y': ['TAT', 'TAC']}
+
 
 def is_protein(seq):
     unique_chars = set(seq)

From 1746a23be47ad4317d761bf2975a7348862c8c4b Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Wed, 27 Sep 2023 16:58:02 +0300
Subject: [PATCH 06/25] Create global variables with alphabets

---
 protein_tools.py | 93 +++++++++++++++++++++---------------------------
 1 file changed, 41 insertions(+), 52 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index e0fb41b..e55f70f 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -1,3 +1,30 @@
+alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
+
+amino_acid_masses = {
+    'A': 71.03711,
+    'R': 156.10111,
+    'N': 114.04293,
+    'D': 115.02694,
+    'C': 103.00919,
+    'Q': 128.05858,
+    'E': 129.04259,
+    'G': 57.02146,
+    'H': 137.05891,
+    'I': 113.08406,
+    'L': 113.08406,
+    'K': 128.09496,
+    'M': 131.04049,
+    'F': 147.06841,
+    'P': 97.05276,
+    'S': 87.03203,
+    'T': 101.04768,
+    'W': 186.07931,
+    'Y': 163.06333,
+    'V': 99.06841
+}
+
+gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+
 codon_table = {
         'A': ['GCT', 'GCC', 'GCA', 'GCG'],
         'C': ['TGT', 'TGC'],
@@ -21,6 +48,17 @@
         'Y': ['TAT', 'TAC']}
 
 
+def is_protein(seq):
+    unique_chars = set(seq)
+    return unique_chars <= alphabet_protein
+
+
+def molecular_weight(seq):
+    molecular_weight = 0
+    for amino_acid in seq:
+        molecular_weight += amino_acid_masses[amino_acid]
+    return round(molecular_weight, 3)
+
 
 def compute_length(*seqs: str):
     """
@@ -37,7 +75,6 @@ def compute_length(*seqs: str):
 
 
 def level_of_hydrophobic(protein):
-    gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
 
     count_of_gydrophobic = 0
     if is_protein(protein):
@@ -53,22 +90,11 @@ def level_of_hydrophobic(protein):
 def translation(seq):
     """
     """
-    gene_code = {
-        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
-        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
-        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
-        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
-        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
-        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
-        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
-        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
-        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
-    }
-    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
+      triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
     protein = []
     for triplet in triplets:
-        for aminoacid in gene_code.keys():
-            if triplet in gene_code[aminoacid]:
+        for aminoacid in codon_table.keys():
+            if triplet in codon_table[aminoacid]:
                 protein.append(aminoacid)
 
     if is_protein("".join(protein)):
@@ -96,43 +122,6 @@ def mutations(seq, protein):
         return "It isn't a protein."
 
 
-alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
-amino_acid_masses = {
-    'A': 71.03711,
-    'R': 156.10111,
-    'N': 114.04293,
-    'D': 115.02694,
-    'C': 103.00919,
-    'Q': 128.05858,
-    'E': 129.04259,
-    'G': 57.02146,
-    'H': 137.05891,
-    'I': 113.08406,
-    'L': 113.08406,
-    'K': 128.09496,
-    'M': 131.04049,
-    'F': 147.06841,
-    'P': 97.05276,
-    'S': 87.03203,
-    'T': 101.04768,
-    'W': 186.07931,
-    'Y': 163.06333,
-    'V': 99.06841
-}
-
-
-def is_protein(seq):
-    unique_chars = set(seq)
-    return unique_chars <= alphabet_protein
-
-
-def molecular_weight(seq):
-    molecular_weight = 0
-    for amino_acid in seq:
-        molecular_weight += amino_acid_masses[amino_acid]
-    return round(molecular_weight, 3)
-
-
 def run_protein_tools(*seqs_and_procedure):
     procedure = seqs_and_procedure[-1]
     seqs = seqs_and_procedure[:-1]

From e48afa0c8da143938a7d856fbe525e09908891e4 Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Wed, 27 Sep 2023 17:21:55 +0300
Subject: [PATCH 07/25] Remove is_protein from all functions except run_protein
 tools

---
 protein_tools.py | 63 +++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index e55f70f..3c54477 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -1,5 +1,4 @@
 alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
-
 amino_acid_masses = {
     'A': 71.03711,
     'R': 156.10111,
@@ -22,9 +21,6 @@
     'Y': 163.06333,
     'V': 99.06841
 }
-
-gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
-
 codon_table = {
         'A': ['GCT', 'GCC', 'GCA', 'GCG'],
         'C': ['TGT', 'TGC'],
@@ -46,18 +42,7 @@
         'V': ['GTT', 'GTC', 'GTA', 'GTG'],
         'W': ['TGG'],
         'Y': ['TAT', 'TAC']}
-
-
-def is_protein(seq):
-    unique_chars = set(seq)
-    return unique_chars <= alphabet_protein
-
-
-def molecular_weight(seq):
-    molecular_weight = 0
-    for amino_acid in seq:
-        molecular_weight += amino_acid_masses[amino_acid]
-    return round(molecular_weight, 3)
+gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
 
 
 def compute_length(*seqs: str):
@@ -67,20 +52,16 @@ def compute_length(*seqs: str):
     """
     lens = []
     for seq in seqs:
-        if is_protein(seq):
-            lens.append(len(seq))
-        else:
-            raise ValueError('Not a protein')
+        lens.append(len(seq))
     return lens if len(lens) > 1 else lens[0]    
 
 
 def level_of_hydrophobic(protein):
 
     count_of_gydrophobic = 0
-    if is_protein(protein):
-        for i in range(len(protein)):
-            if protein[i] in gydrophobic_aminoacids:
-                count_of_gydrophobic += 1
+    for i in range(len(protein)):
+        if protein[i] in gydrophobic_aminoacids:
+            count_of_gydrophobic += 1
 
     percentage = count_of_gydrophobic / len(protein) * 100
 
@@ -90,7 +71,7 @@ def level_of_hydrophobic(protein):
 def translation(seq):
     """
     """
-      triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
+    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
     protein = []
     for triplet in triplets:
         for aminoacid in codon_table.keys():
@@ -108,18 +89,28 @@ def translation(seq):
 def mutations(seq, protein):
     correct_protein = translation(seq)
 
-    if is_protein(protein):
-        bank_of_mutations = []
-        for i in range(len(correct_protein)):
-            if correct_protein[i] != protein[i]:
-                bank_of_mutations.append(f'{protein[i]}{i + 1}')
+    
+    bank_of_mutations = []
+    for i in range(len(correct_protein)):
+        if correct_protein[i] != protein[i]:
+            bank_of_mutations.append(f'{protein[i]}{i + 1}')
 
-        if len(bank_of_mutations) == 0:
-            return "Protein without mutations."
-        else:
-            return "Mutations:" + ", ".join(bank_of_mutations) + "."
+    if len(bank_of_mutations) == 0:
+        return "Protein without mutations."
     else:
-        return "It isn't a protein."
+        return "Mutations:" + ", ".join(bank_of_mutations) + "."
+
+
+def is_protein(seq):
+    unique_chars = set(seq)
+    return unique_chars <= alphabet_protein
+
+
+def molecular_weight(seq):
+    molecular_weight = 0
+    for amino_acid in seq:
+        molecular_weight += amino_acid_masses[amino_acid]
+    return round(molecular_weight, 3)
 
 
 def run_protein_tools(*seqs_and_procedure):
@@ -139,4 +130,4 @@ def run_protein_tools(*seqs_and_procedure):
         return results[0]
     else:
         return results
-      
+      
\ No newline at end of file

From 34ed7cd47a48200b00cd6af0c91e157e1fdfb5ae Mon Sep 17 00:00:00 2001
From: rereremin <114501294+rereremin@users.noreply.github.com>
Date: Wed, 27 Sep 2023 18:14:29 +0300
Subject: [PATCH 08/25] Update functions and dictionaroes in protein_tools.py

---
 protein_tools.py | 188 +++++++++++++++++++++++++++++------------------
 1 file changed, 117 insertions(+), 71 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 0b4de71..3fbd93d 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -1,65 +1,7 @@
+alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
 
-def level_of_hydrophobic(protein):
-    gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
-
-    count_of_gydrophobic = 0
-    if is_protein(protein):
-        for i in range(len(protein)):
-            if protein[i] in gydrophobic_aminoacids:
-                count_of_gydrophobic += 1
-
-    percentage = count_of_gydrophobic / len(protein) * 100
-
-    return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
-
-
-def translation(seq):
-    """
-    """
-    gene_code = {
-        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
-        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
-        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
-        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
-        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
-        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
-        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
-        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
-        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
-    }
-    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
-    protein = []
-    for triplet in triplets:
-        for aminoacid in gene_code.keys():
-            if triplet in gene_code[aminoacid]:
-                protein.append(aminoacid)
-
-    if is_protein("".join(protein)):
-        start = protein.index("M")
-        stop = protein.index("*")
-        return "".join(protein[start:stop + 1])
-    else:
-        return "This sequence doesn't include the gene."
-
-
-def mutations(seq, protein):
-    correct_protein = translation(seq)
-
-    if is_protein(protein):
-        bank_of_mutations = []
-        for i in range(len(correct_protein)):
-            if correct_protein[i] != protein[i]:
-                bank_of_mutations.append(f'{protein[i]}{i + 1}')
-
-        if len(bank_of_mutations) == 0:
-            return "Protein without mutations."
-        else:
-            return "Mutations:" + ", ".join(bank_of_mutations) + "."
-    else:
-        return "It isn't a protein."
-
+alphabet_rna = {'A', 'U', 'G', 'C'}
 
-alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
 amino_acid_masses = {
     'A': 71.03711,
     'R': 156.10111,
@@ -83,34 +25,138 @@ def mutations(seq, protein):
     'V': 99.06841
 }
 
+gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+
+dna_codons = {
+        'A': ['GCT', 'GCC', 'GCA', 'GCG'],
+        'C': ['TGT', 'TGC'],
+        'D': ['GAT', 'GAC'],
+        'E': ['GAA', 'GAG'],
+        'F': ['TTT', 'TTC'],
+        'G': ['GGT', 'GGC', 'GGA', 'GGG'],
+        'H': ['CAT', 'CAC'],
+        'I': ['ATT', 'ATC', 'ATA'],
+        'K': ['AAA', 'AAG'],
+        'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
+        'M': ['ATG'],
+        'N': ['AAT', 'AAC'],
+        'P': ['CCT', 'CCC', 'CCA', 'CCG'],
+        'Q': ['CAA', 'CAG'],
+        'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
+        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
+        'T': ['ACT', 'ACC', 'ACA', 'ACG'],
+        'V': ['GTT', 'GTC', 'GTA', 'GTG'],
+        'W': ['TGG'],
+        'Y': ['TAT', 'TAC'],
+        '*': ["UAA", "UAG", "UGA"]}
+
+rna_codons = {
+        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
+        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
+        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
+        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
+        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
+        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
+        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
+        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
+        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
+    }
+
 
 def is_protein(seq):
-    unique_chars = set(seq)
+    unique_chars = set(seq.upper())
     return unique_chars <= alphabet_protein
 
 
-def molecular_weight(seq):
+def is_rna(seq):
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_rna
+
+
+def compute_molecular_weight(seq):
     molecular_weight = 0
     for amino_acid in seq:
         molecular_weight += amino_acid_masses[amino_acid]
     return round(molecular_weight, 3)
 
 
+def compute_length(seq: str):
+    """
+    Compute the length of the input amino acid sequence
+    
+    """
+    return len(seq)    
+
+
+def compute_hydrophobicity(protein):
+
+    count_of_gydrophobic = 0
+    if is_protein(protein):
+        for i in range(len(protein)):
+            if protein[i] in gydrophobic_aminoacids:
+                count_of_gydrophobic += 1
+
+    percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
+
+    return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
+
+
+def translation(seq):
+    """
+    """
+    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
+    protein = []
+    for triplet in triplets:
+        for aminoacid in rna_codons.keys():
+            if triplet in rna_codons[aminoacid]:
+                protein.append(aminoacid)
+
+    start = protein.index("M")
+    stop = protein.index("*")
+    return "".join(protein[start:stop + 1])
+
+
+def check_mutations(seq, protein):
+
+    if is_protein(protein[:-1]) is not True:
+        raise ValueError("Invalid protein sequence")
+    if is_rna(seq) is not True:
+        raise ValueError("Invalid RNA sequence")
+
+    correct_protein = translation(seq)
+    bank_of_mutations = []
+    
+    for i in range(len(correct_protein)):
+        if correct_protein[i] != protein[i]:
+            bank_of_mutations.append(f'{protein[i]}{i + 1}')
+
+    if len(bank_of_mutations) == 0:
+        return "Protein without mutations."
+    else:
+        return "Mutations:" + ", ".join(bank_of_mutations) + "."
+
+
 def run_protein_tools(*seqs_and_procedure):
     procedure = seqs_and_procedure[-1]
     seqs = seqs_and_procedure[:-1]
 
     results = []
-
-    for seq in seqs:
-        seq = seq.upper()
-        if is_protein(seq) is not True:
-            raise ValueError("Invalid alphabet")
-        if procedure == 'molecular_weight':
-            results.append(molecular_weight(seq))
-
+    if procedure == 'check_mutations':
+        results.append(check_mutations(seqs[0], seqs[1]))
+        
+    else:
+        for seq in seqs:
+            seq = seq.upper()
+            if is_protein(seq) is not True:
+                raise ValueError("Invalid protein sequence")
+            if procedure == 'compute_molecular_weight':
+                results.append(molecular_weight(seq))
+            elif procedure == 'compute_length':
+                results.append(compute_length(seq))
+            elif procedure == 'compute_hydrophobicity':
+                results.append(compute_hydrophobicity(seq))
     if len(results) == 1:
         return results[0]
     else:
         return results
-
+      

From 21c12af1695547bf7aa5fd7180cf5f3a02f7ea79 Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Wed, 27 Sep 2023 19:31:09 +0300
Subject: [PATCH 09/25] Change functions names and extend run_protein_tools

---
 protein_tools.py | 117 +++++++++++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 44 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 3c54477..6fb0943 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -1,4 +1,7 @@
 alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
+
+alphabet_rna = {'A', 'U', 'G', 'C'}
+
 amino_acid_masses = {
     'A': 71.03711,
     'R': 156.10111,
@@ -21,7 +24,10 @@
     'Y': 163.06333,
     'V': 99.06841
 }
-codon_table = {
+
+gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+
+dna_codons = {
         'A': ['GCT', 'GCC', 'GCA', 'GCG'],
         'C': ['TGT', 'TGC'],
         'D': ['GAT', 'GAC'],
@@ -41,29 +47,56 @@
         'T': ['ACT', 'ACC', 'ACA', 'ACG'],
         'V': ['GTT', 'GTC', 'GTA', 'GTG'],
         'W': ['TGG'],
-        'Y': ['TAT', 'TAC']}
-gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+        'Y': ['TAT', 'TAC'],
+        '*': ["UAA", "UAG", "UGA"]}
+
+rna_codons = {
+        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
+        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
+        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
+        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
+        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
+        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
+        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
+        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
+        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
+    }
+
+
+def is_protein(seq):
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_protein
 
 
-def compute_length(*seqs: str):
+def is_rna(seq):
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_rna
+
+
+def compute_molecular_weight(seq):
+    molecular_weight = 0
+    for amino_acid in seq:
+        molecular_weight += amino_acid_masses[amino_acid]
+    return round(molecular_weight, 3)
+
+
+def compute_length(seq: str):
     """
     Compute the length of the input amino acid sequence
     
     """
-    lens = []
-    for seq in seqs:
-        lens.append(len(seq))
-    return lens if len(lens) > 1 else lens[0]    
+    return len(seq)    
 
 
-def level_of_hydrophobic(protein):
+def compute_hydrophobicity(protein):
 
     count_of_gydrophobic = 0
-    for i in range(len(protein)):
-        if protein[i] in gydrophobic_aminoacids:
-            count_of_gydrophobic += 1
+    if is_protein(protein):
+        for i in range(len(protein)):
+            if protein[i] in gydrophobic_aminoacids:
+                count_of_gydrophobic += 1
 
-    percentage = count_of_gydrophobic / len(protein) * 100
+    percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
 
     return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
 
@@ -74,23 +107,25 @@ def translation(seq):
     triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
     protein = []
     for triplet in triplets:
-        for aminoacid in codon_table.keys():
-            if triplet in codon_table[aminoacid]:
+        for aminoacid in rna_codons.keys():
+            if triplet in rna_codons[aminoacid]:
                 protein.append(aminoacid)
 
-    if is_protein("".join(protein)):
-        start = protein.index("M")
-        stop = protein.index("*")
-        return "".join(protein[start:stop + 1])
-    else:
-        return "This sequence doesn't include the gene."
+    start = protein.index("M")
+    stop = protein.index("*")
+    return "".join(protein[start:stop + 1])
 
 
-def mutations(seq, protein):
-    correct_protein = translation(seq)
+def check_mutations(seq, protein):
 
-    
+    if is_protein(protein[:-1]) is not True:
+        raise ValueError("Invalid protein sequence")
+    if is_rna(seq) is not True:
+        raise ValueError("Invalid RNA sequence")
+
+    correct_protein = translation(seq)
     bank_of_mutations = []
+    
     for i in range(len(correct_protein)):
         if correct_protein[i] != protein[i]:
             bank_of_mutations.append(f'{protein[i]}{i + 1}')
@@ -101,31 +136,25 @@ def mutations(seq, protein):
         return "Mutations:" + ", ".join(bank_of_mutations) + "."
 
 
-def is_protein(seq):
-    unique_chars = set(seq)
-    return unique_chars <= alphabet_protein
-
-
-def molecular_weight(seq):
-    molecular_weight = 0
-    for amino_acid in seq:
-        molecular_weight += amino_acid_masses[amino_acid]
-    return round(molecular_weight, 3)
-
-
 def run_protein_tools(*seqs_and_procedure):
     procedure = seqs_and_procedure[-1]
     seqs = seqs_and_procedure[:-1]
 
     results = []
-
-    for seq in seqs:
-        seq = seq.upper()
-        if is_protein(seq) is not True:
-            raise ValueError("Invalid alphabet")
-        if procedure == 'molecular_weight':
-            results.append(molecular_weight(seq))
-
+    if procedure == 'check_mutations':
+        results.append(check_mutations(seqs[0], seqs[1]))
+        
+    else:
+        for seq in seqs:
+            seq = seq.upper()
+            if is_protein(seq) is not True:
+                raise ValueError("Invalid protein sequence")
+            if procedure == 'compute_molecular_weight':
+                results.append(molecular_weight(seq))
+            elif procedure == 'compute_length':
+                results.append(compute_length(seq))
+            elif procedure == 'compute_hydrophobicity':
+                results.append(compute_hydrophobicity(seq))
     if len(results) == 1:
         return results[0]
     else:

From aa4618c0480c8f91f75014675f9bd1384c3d6f63 Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Fri, 29 Sep 2023 02:22:49 +0300
Subject: [PATCH 10/25] Add docstings and revise run_protein_tools function

---
 protein_tools.py | 78 ++++++++++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 6fb0943..0881b2a 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -63,45 +63,54 @@
     }
 
 
-def is_protein(seq):
+def is_protein(seq:str):
+    """
+    Check the existence of a protein sequence, return boolean.
+    """
     unique_chars = set(seq.upper())
     return unique_chars <= alphabet_protein
 
 
-def is_rna(seq):
+def is_rna(seq:str):
+    """
+    Check the existence of a RNA sequence, return boolean.
+    """
     unique_chars = set(seq.upper())
     return unique_chars <= alphabet_rna
 
 
-def compute_molecular_weight(seq):
+def compute_molecular_weight(seq:str):
+    """
+    Compute molecular weight (g/mol) of protein sequence.
+    """
     molecular_weight = 0
-    for amino_acid in seq:
+    for amino_acid in seq.upper():
         molecular_weight += amino_acid_masses[amino_acid]
     return round(molecular_weight, 3)
 
 
-def compute_length(seq: str):
+def compute_length(seq:str):
     """
-    Compute the length of the input amino acid sequence
-    
+    Compute the length of protein sequence.
     """
     return len(seq)    
 
 
-def compute_hydrophobicity(protein):
-
+def compute_hydrophobicity(protein:str):
+    """
+    Compute the percentage of gydrophobic aminoacids in protein sequence.
+    """
     count_of_gydrophobic = 0
-    if is_protein(protein):
-        for i in range(len(protein)):
-            if protein[i] in gydrophobic_aminoacids:
-                count_of_gydrophobic += 1
+    for i in range(len(protein)):
+        if protein[i] in gydrophobic_aminoacids:
+            count_of_gydrophobic += 1
 
     percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
 
     return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
 
 
-def translation(seq):
+def translation(seq:str):
     """
     """
     triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
@@ -116,8 +125,9 @@ def translation(seq):
     return "".join(protein[start:stop + 1])
 
 
-def check_mutations(seq, protein):
-
+def check_mutations(seq:str, protein:str):
+    """
+    """
     if is_protein(protein[:-1]) is not True:
         raise ValueError("Invalid protein sequence")
     if is_rna(seq) is not True:
@@ -136,27 +146,37 @@ def check_mutations(seq, protein):
         return "Mutations:" + ", ".join(bank_of_mutations) + "."
 
 
-def run_protein_tools(*seqs_and_procedure):
-    procedure = seqs_and_procedure[-1]
-    seqs = seqs_and_procedure[:-1]
-
+def run_protein_tools(*args:str):
+    """
+    Function containing methods for protein analysis.
+    
+    Takes arbitrary number of arguments with protein sequencies
+    and the name of the procedure to be performed (always the 
+    last argument). Returns the result of the procedure as string 
+    if one sequnce is submitted or list if several.
+
+    If procedure 'check_mutations' is used then input must be only three
+    arguments: RNA sequence, protein sequence and the name of procedure 
+    itself.
+    """
+    *seqs, procedure = args
     results = []
+    d_of_functions = {'compute_molecular_weight': compute_molecular_weight, 
+                  'compute_length': compute_length,
+                  'compute_hydrophobicity': compute_hydrophobicity,
+                 }
     if procedure == 'check_mutations':
         results.append(check_mutations(seqs[0], seqs[1]))
-        
     else:
         for seq in seqs:
-            seq = seq.upper()
             if is_protein(seq) is not True:
                 raise ValueError("Invalid protein sequence")
-            if procedure == 'compute_molecular_weight':
-                results.append(molecular_weight(seq))
-            elif procedure == 'compute_length':
-                results.append(compute_length(seq))
-            elif procedure == 'compute_hydrophobicity':
-                results.append(compute_hydrophobicity(seq))
+            if procedure not in d_of_functions:
+                raise ValueError("Wrong procedure name")
+            else:
+                results.append(d_of_functions[procedure](seq))
     if len(results) == 1:
         return results[0]
     else:
         return results
-      
\ No newline at end of file
+        
\ No newline at end of file

From ed17f8a4c8c6254a208a8b04237632e2467c74fb Mon Sep 17 00:00:00 2001
From: Artem Toropov <144557024+artyomtorr@users.noreply.github.com>
Date: Fri, 29 Sep 2023 00:35:50 +0300
Subject: [PATCH 11/25] Update README.md

---
 README.md | 80 +++++++++++++++++--------------------------------------
 1 file changed, 25 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index f918170..a93e4cf 100644
--- a/README.md
+++ b/README.md
@@ -1,65 +1,35 @@
-# HW 4. Functions 2
-> *This is the repo for the fourth homework of the BI Python 2023 course*
+# protein_tools.py
 
-### Homework description
+**protein_tools.py** - is a tool which allows the performing of various procedures for a user entered protein sequences. 
 
-На прошлой неделе вы делали утилиту для работы с последовательностями нуклеиновых кислот (с весьма строгим ТЗ). Пришло время для чего-то более самостоятельного. 
+### Usage
 
-#### Основное задание
+The tool works by calling the function `run_protein_tools`, which takes arbitrary number of arguments with protein sequencies (*str*) and the name of the procedure to be performed (always the last argument, *str*, see the usage examples below). The output is the result of the procedure as *string* if one sequence is submitted or *list* if several.
 
+**NOTE:**  For the procedure `check_mutations` a fixed number of string arguments are used: one RNA sequence, one protein sequence and the name of procedure itself.
 
-Напишите утилиту для работы с последовательностями белков. Там должно быть минимум 5 различных операций, должна быть какая-то точка входа через которую пользователь будет всё это дело использовать. На этом, по сути, всё. Всё целиком зависит от вашей фантазии и креативности. Можете опираться на ДЗ №2 и №3. 
+### Procedures
 
-Самая главная часть задания - это файл `README.md`. Сделайте краткое введение, напишите описание тула, приведите документацию по использованию со списком аргументов. Добавьте примеры использования. Возможно, вы захотите сделать секцию Troubleshooting. ***Почему это нужно?*** В этот раз проверяющий не будет знать того, как должен работать ваш тул. Это ваш авторский код. Даже самая прекрасная функциональность, не будучи отраженной в README, скорее всего останется незамеченной. README - это ваш способ познакомить пользователя с тулом, показать всё лучше и обосновать, почему именно ваша команда должна получить наивысший балл. 
+- `compute_molecular_weight` — computes molecular weight of protein sequence in g/mol
+- `compute_length` — computes the number of amino acids in protein sequence
+- `compute_hydrophobicity` — computes the percentage of gydrophobic aminoacids in protein sequence
+- `check_mutations` — 
 
-Есть люди которые, любят писать документации, а есть те - кто не любит. Найдите в вашей команде того, кто любит. И в будущем в своих рабочих проектах всегда держите рядом такого человек (или будьте им). 
+### Examples
+```python
+run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_length') # [10, 18, 9]
+run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_molecular_weight') # [1055.496, 1886.872, 942.482]
+run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_hydrophobicity') # [50.0, 27.778, 11.111]
 
-Примеры некоторых README, которыми можно вдохновляться:
+```
+   
+### Additional information
+- The program works **only** with protein or RNA sequences. If any of the entered sequences contain inappropriate characters or cannot exist, the program will display an error. Sequences can contain characters of any case.
 
-- [MetaFX](https://github.com/ctlab/metafx), тул Артёма Иванова. Там еще и [wiki](https://github.com/ctlab/metafx/wiki) крутое.
-- [samovar](https://github.com/nvaulin/samovar)
-- [MetaGEM](https://github.com/franciscozorrilla/metaGEM)
-- [Pharokka](https://github.com/gbouras13/pharokka)
+```python
+run_protein_tools('ATA', 'DefinitelyNotDNA', 'transcribe') # ValueError: Invalid alpabet
+run_protein_tools('ATGU', 'reverse') # ValueError: Invalid alpabet
+```
 
-Типовые секции, на которые стоит обратить внимание: Title, Overview, Usage, Options, Examples, Troubleshooting, Contacts.
-
-**Tехническое требование к заданию.**
-
-Это задание будет выполняться в командах по 3 человека. Каждый из членов команды должен внести <ins>***как минимум***</ins> 2 функции. Каждое внесение функции должно сопровождаться коммитом с осмысленным описанием коммита. Ниже приведена последовательность действий для успешного выполнения задания (аналогично ДЗ №2):
-
-1. Посмотрите состав своей команды здесь ([**ССЫЛКА**](https://docs.google.com/spreadsheets/d/1KMBBBu8LqauRpDJb0v1ldPwpvzNn8-KakcHexAcqLsE/edit?usp=sharing)). 
-2. Тимлид делает форк данного репозитория. **В форке создает ветку `HW4_<surname>`, в ветке создает папку `HW4_<surname>`, в этой папке вы всё делаете.**
-3. Члены команды могут либо делать свои форки, либо работать в репозитории тимлида в качестве колабораторов ("contributors"). В любом случае делаете клоны => пишите код локально => пушите.
-4. В конце тимлид делайет pull-request из `HW4_<surname>` своего репозитория в `main` этого.
-
-
-А также:
-- Сопроводите программу лучшим `README.md` файлом в вашей жизни (на английском языке).
-- В этом ДЗ проблемы с качеством кода (нейминги, пустые строки, анноатции типов, док.стринги, пробелы) могут привести к снижению балла. Воспользуйтесь линтерами чтобы себя обезопасить. IDE по типу PyCharm или VSCode имеют фунцонал по авто-исправлению многих проблем такого рода. 
-
-Автотестов на GitHub в этом ДЗ нет, но вы можете прогнать линтеры на качество кода локально (как в ДЗ №3, подробнее читайте [тут](https://plausible-cannon-091.notion.site/Code-auto-checks-02b2ea69c1d545fca07b50ce5933ed5f?pvs=4)). 
-
-- Программа должна сохранять регистр символов.
-- Программа должна работать только с последовательностями белков.
-- Запрещается использование сторонних модулей.
-
-
-### Форма сдачи
-
-Прикрепите ссылку на pull-request тимлида в Google Class (можете сделать от лица каждого члена команды, но это не обязательно).
-
-
-### Pазбалловка
-
-- За каждую из 5 операций - максимум **1.5 балла**
-- За README - максимум **2.5 балла**
-- Если вы не внесли как минимум 2 функции от себя, вы получаете 0 баллов (на баллы остальных членов команды это не влияет).
-- За фото созвона в README можно получить 0.2 доп. балла (но не более 10 баллов суммарно)
-
-
-
-### **Предполагаемый учебный результат**
-
-Это задание позволит вам проявить креативность и учиться быть не только кодером, но и автором. Также это задание поможет окончательно закрепить материал по функциям который мы прошли.
-
-Удачи! ✨✨
+### Contacts
+Author contributions:

From a25248ca9525748800ee83cf47362291fbf797a0 Mon Sep 17 00:00:00 2001
From: Artem Toropov <144557024+artyomtorr@users.noreply.github.com>
Date: Fri, 29 Sep 2023 16:52:17 +0300
Subject: [PATCH 12/25] Add contacts to README.md

---
 README.md | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index a93e4cf..241b744 100644
--- a/README.md
+++ b/README.md
@@ -13,23 +13,34 @@ The tool works by calling the function `run_protein_tools`, which takes arbitrar
 - `compute_molecular_weight` — computes molecular weight of protein sequence in g/mol
 - `compute_length` — computes the number of amino acids in protein sequence
 - `compute_hydrophobicity` — computes the percentage of gydrophobic aminoacids in protein sequence
-- `check_mutations` — 
+- `check_mutations` —
+- 
 
 ### Examples
 ```python
 run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_length') # [10, 18, 9]
 run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_molecular_weight') # [1055.496, 1886.872, 942.482]
 run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_hydrophobicity') # [50.0, 27.778, 11.111]
-
+run_protein_tools('AUGGAUCAUcAAUAA', 'MDKL*', 'check_mutations') #'Mutations:K3, L4.'
 ```
    
 ### Additional information
-- The program works **only** with protein or RNA sequences. If any of the entered sequences contain inappropriate characters or cannot exist, the program will display an error. Sequences can contain characters of any case.
+- The program works **only** with protein and RNA sequences. If any of the entered sequences contain inappropriate characters or cannot exist, the program will display an error. Sequences can contain characters of any case.
 
 ```python
-run_protein_tools('ATA', 'DefinitelyNotDNA', 'transcribe') # ValueError: Invalid alpabet
-run_protein_tools('ATGU', 'reverse') # ValueError: Invalid alpabet
+run_protein_tools('PROTEIN', 'compute_molecular_weight') # ValueError: Invalid protein sequence
+run_protein_tools('AUGGAU_AUcAAUAA', 'MDKL*', 'check_mutations')# ValueError: Invalid RNA sequence
 ```
 
 ### Contacts
-Author contributions:
+Please use contacts below to reach out with any comments, concerns, or discussions regarding **protein_tools.py.** <br>
+- Artyom Toropov ([@artyomtorr](github.com/artyomtorr)) <br>
+- Sofiya Vinogradova ([@sofiyaga57](github.com/sofiyaga57)) <br>
+- Nikita Zherko ([@rereremin](github.com/rereremin)) <br>
+![изображение](https://github.com/artyomtorr/HW4_Functions2/assets/144557024/88f1c523-711a-40d7-9134-30c6b6639037)
+
+
+*Author contributions:* <br> 
+Artyom Toropov (teamlead): functions *is_protein*, *molecular_weight*, *run_protein_tools* <br> 
+Sofiya Vinogradova: functions ..., <br> 
+Nikita Zherko: functions *compute_hydrophobicity*, *check_mutations*.

From 7d1e07ef80d07b7cba878b5f22434e6e4ee17814 Mon Sep 17 00:00:00 2001
From: Artem Toropov <144557024+artyomtorr@users.noreply.github.com>
Date: Fri, 29 Sep 2023 17:15:27 +0300
Subject: [PATCH 13/25] Update README.md

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 241b744..b6b838a 100644
--- a/README.md
+++ b/README.md
@@ -34,13 +34,13 @@ run_protein_tools('AUGGAU_AUcAAUAA', 'MDKL*', 'check_mutations')# ValueError: In
 
 ### Contacts
 Please use contacts below to reach out with any comments, concerns, or discussions regarding **protein_tools.py.** <br>
-- Artyom Toropov ([@artyomtorr](github.com/artyomtorr)) <br>
-- Sofiya Vinogradova ([@sofiyaga57](github.com/sofiyaga57)) <br>
-- Nikita Zherko ([@rereremin](github.com/rereremin)) <br>
+- Artyom Toropov ([@artyomtorr](https://github.com/artyomtorr/)) <br>
+- Sofiya Vinogradova ([@sofiyaga57](https://github.com/sofiyaga57/)) <br>
+- Nikita Zherko ([@rereremin](https://github.com/rereremin/)) <br>
 ![изображение](https://github.com/artyomtorr/HW4_Functions2/assets/144557024/88f1c523-711a-40d7-9134-30c6b6639037)
 
 
 *Author contributions:* <br> 
-Artyom Toropov (teamlead): functions *is_protein*, *molecular_weight*, *run_protein_tools* <br> 
+Artyom Toropov (teamlead): functions `is_protein`, `compute_molecular_weight`, `run_protein_tools` <br> 
 Sofiya Vinogradova: functions ..., <br> 
-Nikita Zherko: functions *compute_hydrophobicity*, *check_mutations*.
+Nikita Zherko: functions `compute_hydrophobicity`, `check_mutations`

From 140222d3a18f8bc0826dfb25ba5ff68d107d5420 Mon Sep 17 00:00:00 2001
From: rereremin <114501294+rereremin@users.noreply.github.com>
Date: Fri, 29 Sep 2023 22:01:14 +0300
Subject: [PATCH 14/25] Add docstings protein_tools.py

---
 protein_tools.py | 78 ++++++++++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 3fbd93d..0452f71 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -63,45 +63,54 @@
     }
 
 
-def is_protein(seq):
+def is_protein(seq:str):
+    """
+    Check the existence of a protein sequence, return boolean.
+    """
     unique_chars = set(seq.upper())
     return unique_chars <= alphabet_protein
 
 
-def is_rna(seq):
+def is_rna(seq:str):
+    """
+    Check the existence of a RNA sequence, return boolean.
+    """
     unique_chars = set(seq.upper())
     return unique_chars <= alphabet_rna
 
 
-def compute_molecular_weight(seq):
+def compute_molecular_weight(seq:str):
+    """
+    Compute molecular weight (g/mol) of protein sequence.
+    """
     molecular_weight = 0
-    for amino_acid in seq:
+    for amino_acid in seq.upper():
         molecular_weight += amino_acid_masses[amino_acid]
     return round(molecular_weight, 3)
 
 
-def compute_length(seq: str):
+def compute_length(seq:str):
     """
-    Compute the length of the input amino acid sequence
-    
+    Compute the length of protein sequence.
     """
     return len(seq)    
 
 
-def compute_hydrophobicity(protein):
-
+def compute_hydrophobicity(protein:str):
+    """
+    Compute the percentage of gydrophobic aminoacids in protein sequence.
+    """
     count_of_gydrophobic = 0
-    if is_protein(protein):
-        for i in range(len(protein)):
-            if protein[i] in gydrophobic_aminoacids:
-                count_of_gydrophobic += 1
+    for i in range(len(protein)):
+        if protein[i] in gydrophobic_aminoacids:
+            count_of_gydrophobic += 1
 
     percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
 
     return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
 
 
-def translation(seq):
+def translation(seq:str):
     """
     """
     triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
@@ -116,8 +125,9 @@ def translation(seq):
     return "".join(protein[start:stop + 1])
 
 
-def check_mutations(seq, protein):
-
+def check_mutations(seq:str, protein:str):
+    """
+    """
     if is_protein(protein[:-1]) is not True:
         raise ValueError("Invalid protein sequence")
     if is_rna(seq) is not True:
@@ -136,27 +146,37 @@ def check_mutations(seq, protein):
         return "Mutations:" + ", ".join(bank_of_mutations) + "."
 
 
-def run_protein_tools(*seqs_and_procedure):
-    procedure = seqs_and_procedure[-1]
-    seqs = seqs_and_procedure[:-1]
-
+def run_protein_tools(*args:str):
+    """
+    Function containing methods for protein analysis.
+    
+    Takes arbitrary number of arguments with protein sequencies
+    and the name of the procedure to be performed (always the 
+    last argument). Returns the result of the procedure as string 
+    if one sequnce is submitted or list if several.
+
+    If procedure 'check_mutations' is used then input must be only three
+    arguments: RNA sequence, protein sequence and the name of procedure 
+    itself.
+    """
+    *seqs, procedure = args
     results = []
+    d_of_functions = {'compute_molecular_weight': compute_molecular_weight, 
+                  'compute_length': compute_length,
+                  'compute_hydrophobicity': compute_hydrophobicity,
+                 }
     if procedure == 'check_mutations':
         results.append(check_mutations(seqs[0], seqs[1]))
-        
     else:
         for seq in seqs:
-            seq = seq.upper()
             if is_protein(seq) is not True:
                 raise ValueError("Invalid protein sequence")
-            if procedure == 'compute_molecular_weight':
-                results.append(molecular_weight(seq))
-            elif procedure == 'compute_length':
-                results.append(compute_length(seq))
-            elif procedure == 'compute_hydrophobicity':
-                results.append(compute_hydrophobicity(seq))
+            if procedure not in d_of_functions:
+                raise ValueError("Wrong procedure name")
+            else:
+                results.append(d_of_functions[procedure](seq))
     if len(results) == 1:
         return results[0]
     else:
         return results
-      
+        

From f85159cde2a2e073ec17771d1d0ec4552fa7f7fe Mon Sep 17 00:00:00 2001
From: nikita <zherko.na@phystech.edu>
Date: Fri, 29 Sep 2023 23:59:53 +0300
Subject: [PATCH 15/25] Update translation and check_mutatoins with raise and
 add docstrings

---
 protein_tools.py | 57 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 0452f71..9f4e7dc 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -96,9 +96,16 @@ def compute_length(seq:str):
     return len(seq)    
 
 
-def compute_hydrophobicity(protein:str):
+def compute_hydrophobicity(protein:str) -> str:
     """
     Compute the percentage of gydrophobic aminoacids in protein sequence.
+
+    Argument:
+    - protein (str): protein sequence. Include hydrophobic 
+    and hydrophilic aminoacids.
+
+    Return:
+    - str, result of computation percentage of gydrophobic aminoacids.
     """
     count_of_gydrophobic = 0
     for i in range(len(protein)):
@@ -110,8 +117,16 @@ def compute_hydrophobicity(protein:str):
     return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
 
 
-def translation(seq:str):
+def translation(seq:str) -> str:
     """
+    Realize the translation mRNA into protein sequence.
+
+    Argument:
+    - seq (str): mRNA sequence
+
+    Return:
+    - str, protein after translation 
+    Remark: Correct protein sequence starts with "M" and ends with "*".
     """
     triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
     protein = []
@@ -120,18 +135,50 @@ def translation(seq:str):
             if triplet in rna_codons[aminoacid]:
                 protein.append(aminoacid)
 
+    if protein[-1] != "*":
+        raise ValueError("Stop-codon (*) is absent in mRNA")
+    if protein[0] != "M":
+        raise ValueError("Start-codon (M) is absent in mRNA")
+
     start = protein.index("M")
     stop = protein.index("*")
     return "".join(protein[start:stop + 1])
 
 
-def check_mutations(seq:str, protein:str):
+def check_mutations(seq:str, protein:str) -> str:
     """
+    Check mutations in the protein sequence after translation.
+
+    Use additional function "translation(seq)".
+    This function doesn't show mutations, which don't lead to 
+    change aminoacids in protein sequence. 
+
+    Arguments:
+    - seq (str): translation sequence of mRNA with/without mutations
+    - protein (str): protein for comparison with protein after translation.
+    Every protein starts with "M" and ends with "*" (stop-codon). 
+    Remark: is_protein(seq) doesn't see "*", but it's used in the other part of function.
+
+    Return:
+    - str, if mRNA without mutations return "Protein without mutations." 
+    If some mutations in protein, return aminoacid(s) and their position(s)
+
+    Examples:
+    - "AUGGUAGGGAAAUUUUGA", "MVGKF*" ->  "Protein without mutations."
+    - "AUGGUAGGGAAAUUUUGA", "MGGKF*" ->  "Mutations:G2."
+    - "AUGGUAGGGAAAUUUUGA", "MGGVF*" -> "Mutations:G2, V4."
+    - "AUGGUAGGGAAAUUUUGA", "MGGKF" –> ValueError: Stop (*) is absent"
+    - "AUGGUAGGGAAAUUUUGA", "GGKF*" –> ValueError: Start (M) is absent"
+    
     """
     if is_protein(protein[:-1]) is not True:
         raise ValueError("Invalid protein sequence")
     if is_rna(seq) is not True:
         raise ValueError("Invalid RNA sequence")
+    if protein[-1] != "*":
+        raise ValueError("Stop (*) is absent")
+    if protein[0] != "M":
+        raise ValueError("Start (M) is absent")
 
     correct_protein = translation(seq)
     bank_of_mutations = []
@@ -180,3 +227,7 @@ def run_protein_tools(*args:str):
     else:
         return results
         
+print(run_protein_tools("AUGGUAGGGAAAUUUUGA", "MGGKF*", "check_mutations"))
+print(run_protein_tools("GUAGGGAAAUUUUgA", "MGVKF*", "check_mutations"))
+#print(translation("AUGGUAGGGAAAUUUUGA"))
+

From d7575e2d8f8d077980c78b4f4a4772f6a12e947b Mon Sep 17 00:00:00 2001
From: rereremin <114501294+rereremin@users.noreply.github.com>
Date: Sat, 30 Sep 2023 01:02:18 +0300
Subject: [PATCH 16/25] Add raise in check_mutations and change return in
 compute_hydrophobicity

---
 protein_tools.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index ab0e53a..5dc669a 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -96,9 +96,16 @@ def compute_length(seq:str):
     return len(seq)    
 
 
-def compute_hydrophobicity(protein:str) -> str:
+def compute_hydrophobicity(protein:str) -> tuple:
     """
     Compute the percentage of gydrophobic aminoacids in protein sequence.
+
+    Argument:
+    - protein (str): protein sequence. Include hydrophobic 
+    and hydrophilic aminoacids.
+
+    Return:
+    - tuple, result of computation percentage of gydrophobic aminoacids.
     """
     count_of_gydrophobic = 0
     for i in range(len(protein)):
@@ -107,7 +114,7 @@ def compute_hydrophobicity(protein:str) -> str:
 
     percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
 
-    return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
+    return protein, percentage
 
 
 def translate_rna(seq:str) -> str:
@@ -160,9 +167,9 @@ def check_mutations(seq:str, protein:str) -> str:
     Examples:
     - "AUGGUAGGGAAAUUUUGA", "MVGKF*" ->  "Protein without mutations."
     - "AUGGUAGGGAAAUUUUGA", "MGGVF*" -> "Mutations:G2, V4."
-    - "AUGGUAGGGAAAUUUUGA", "MGGKF" –> ValueError: Stop (*) is absent"
-    - "AUGGUAGGGAAAUUUUGA", "GGKF*" –> ValueError: Start (M) is absent"
-    
+    - "AUGGUAGGGAAAUUUUGA", "MGGKF" –> "ValueError: Stop (*) is absent"
+    - "AUGGUAGGGAAAUUUUGA", "GGKF*" –> "ValueError: Start (M) is absent"
+    - "AUGAAAAAAUGA", "MK*" -> "ValueError: Different length of translated protein and protein"
     """
 
     correct_protein = translation(seq)
@@ -176,6 +183,8 @@ def check_mutations(seq:str, protein:str) -> str:
         raise ValueError("Stop (*) is absent")
     if protein[0] != "M":
         raise ValueError("Start (M) is absent")
+    if len(protein) != len(seq)/3:
+        raise ValueError("Different length of translated protein and protein")
     
     for i in range(len(correct_protein)):
         if correct_protein[i] != protein[i]:

From 5fddd4fb15f1b6a70756ac02dd5e0decb20c7e5a Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Sun, 1 Oct 2023 04:35:55 +0300
Subject: [PATCH 17/25] Edit docstrings

---
 protein_tools.py | 61 +++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 5dc669a..68e7bb8 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -79,21 +79,34 @@ def is_rna(seq:str):
     return unique_chars <= alphabet_rna
 
 
-def compute_molecular_weight(seq:str):
+def compute_molecular_weight(seq:str) -> tuple:
     """
     Compute molecular weight (g/mol) of protein sequence.
+    
+    Argument:
+    - protein (str): protein sequence.
+    
+    Return: 
+    - tuple with protein sequence and computed molecular 
+    weight (float rounded to 3 decimal places).
     """
     molecular_weight = 0
     for amino_acid in seq.upper():
         molecular_weight += amino_acid_masses[amino_acid]
-    return round(molecular_weight, 3)
+    return seq, round(molecular_weight, 3)
 
 
-def compute_length(seq:str):
+def compute_length(seq:str) -> tuple:
     """
     Compute the length of protein sequence.
+    
+    Argument:
+    - protein (str): protein sequence.
+    
+    Return: 
+    - tuple with protein sequence and computed length.
     """
-    return len(seq)    
+    return seq, len(seq)    
 
 
 def compute_hydrophobicity(protein:str) -> tuple:
@@ -101,11 +114,11 @@ def compute_hydrophobicity(protein:str) -> tuple:
     Compute the percentage of gydrophobic aminoacids in protein sequence.
 
     Argument:
-    - protein (str): protein sequence. Include hydrophobic 
-    and hydrophilic aminoacids.
+    - protein (str): protein sequence. 
 
     Return:
-    - tuple, result of computation percentage of gydrophobic aminoacids.
+    - tuple with protein sequence and computed percentage 
+    of gydrophobic aminoacids.
     """
     count_of_gydrophobic = 0
     for i in range(len(protein)):
@@ -148,21 +161,21 @@ def translate_rna(seq:str) -> str:
 
 def check_mutations(seq:str, protein:str) -> str:
     """
-    Check mutations in the protein sequence after translation.
+    Check missense mutations in the protein sequence after translation.
 
-    Use additional function "translation(seq)".
-    This function doesn't show mutations, which don't lead to 
-    change aminoacids in protein sequence. 
+    Uses additional function "translate_rna(seq)".
 
     Arguments:
-    - seq (str): translation sequence of mRNA with/without mutations
-    - protein (str): protein for comparison with protein after translation.
-    Every protein starts with "M" and ends with "*" (stop-codon). 
-    Remark: is_protein(seq) doesn't see "*", but it's used in the other part of function.
+    - seq (str): sequence of mRNA with/without mutations.
+    Must contain start-codon and one of the stop-codons.
+    - protein (str): protein sequence translated from mRNA.
+    Must start with "M" and ends with "*" (stop-codon). 
+    
+    Note: is_protein(seq) doesn't see "*", but it's used in the other part of function.
 
     Return:
     - str, if mRNA without mutations return "Protein without mutations." 
-    If some mutations in protein, return aminoacid(s) and their position(s)
+    If there are mutations in protein, returns aminoacid(s) and their position(s)
 
     Examples:
     - "AUGGUAGGGAAAUUUUGA", "MVGKF*" ->  "Protein without mutations."
@@ -172,7 +185,7 @@ def check_mutations(seq:str, protein:str) -> str:
     - "AUGAAAAAAUGA", "MK*" -> "ValueError: Different length of translated protein and protein"
     """
 
-    correct_protein = translation(seq)
+    correct_protein = translate_rna(seq)
     bank_of_mutations = []
     
     if is_protein(protein[:-1]) is not True:
@@ -193,7 +206,7 @@ def check_mutations(seq:str, protein:str) -> str:
     if len(bank_of_mutations) == 0:
         return "Protein without mutations."
     else:
-        return "Mutations:" + ", ".join(bank_of_mutations) + "."
+        return "Mutations: " + ", ".join(bank_of_mutations) + "."
 
 
 def run_protein_tools(*args:str):
@@ -202,12 +215,12 @@ def run_protein_tools(*args:str):
     
     Takes arbitrary number of arguments with protein sequencies
     and the name of the procedure to be performed (always the 
-    last argument). Returns the result of the procedure as string 
-    if one sequnce is submitted or list if several.
+    last argument). Returns the result of the procedure as tuple 
+    if one sequnce is submitted or list of tuples if several.
 
-    If procedure 'check_mutations' is used then input must be only three
-    arguments: RNA sequence, protein sequence and the name of procedure 
-    itself.
+    Note: if procedure 'check_mutations' is used then input must 
+    contain only three arguments: RNA sequence, protein sequence 
+    and the name of procedure itself.
     """
     *seqs, procedure = args
     results = []
@@ -229,4 +242,4 @@ def run_protein_tools(*args:str):
         return results[0]
     else:
         return results
-        
+        
\ No newline at end of file

From 54171eb733b9c84fda2f0b36597c0e0a2cbc3140 Mon Sep 17 00:00:00 2001
From: Artem Toropov <144557024+artyomtorr@users.noreply.github.com>
Date: Sun, 1 Oct 2023 01:46:52 +0300
Subject: [PATCH 18/25] Update README.md

---
 README.md | 46 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index b6b838a..f8b086e 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 ### Usage
 
-The tool works by calling the function `run_protein_tools`, which takes arbitrary number of arguments with protein sequencies (*str*) and the name of the procedure to be performed (always the last argument, *str*, see the usage examples below). The output is the result of the procedure as *string* if one sequence is submitted or *list* if several.
+The tool works by calling the function `run_protein_tools`, which takes arbitrary number of arguments with protein sequencies (*str*) and the name of the procedure to be performed (always the last argument, *str*, see the usage examples below). The output is the result of the procedure as *string, tuple* or *dictionary* if one sequence is submitted or *list* if several.
 
 **NOTE:**  For the procedure `check_mutations` a fixed number of string arguments are used: one RNA sequence, one protein sequence and the name of procedure itself.
 
@@ -13,25 +13,45 @@ The tool works by calling the function `run_protein_tools`, which takes arbitrar
 - `compute_molecular_weight` — computes molecular weight of protein sequence in g/mol
 - `compute_length` — computes the number of amino acids in protein sequence
 - `compute_hydrophobicity` — computes the percentage of gydrophobic aminoacids in protein sequence
-- `check_mutations` —
-- 
+- `check_mutations` — checks missense mutations in the protein sequence after translation
+- `protein_to_nucleic_acid`- returns possible variants of DNAs for a given protein sequence
+- `count_amino_acids` - calculates the number of each aminoacid in protein sequence
 
 ### Examples
 ```python
-run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_length') # [10, 18, 9]
-run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_molecular_weight') # [1055.496, 1886.872, 942.482]
-run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'GSCKRGPRT', 'compute_hydrophobicity') # [50.0, 27.778, 11.111]
-run_protein_tools('AUGGAUCAUcAAUAA', 'MDKL*', 'check_mutations') #'Mutations:K3, L4.'
+run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'compute_length')
+#[('MAEGEITNLP', 10), ('tGQYLAMDTSgLLYGSQT', 18)]
+
+run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'compute_molecular_weight')
+#[('MAEGEITNLP', 1055.496), ('tGQYLAMDTSgLLYGSQT', 1886.872)]
+
+run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'compute_hydrophobicity')
+#[('MAEGEITNLP', 50.0), ('tGQYLAMDTSgLLYGSQT', 27.778)]
+
+run_protein_tools('AUGGAUCAUcAAUAA', 'MDKL*', 'check_mutations')
+#'Mutations: K3, L4.'
+
+run_protein_tools('MAEGLP', 'LYGSQT','protein_to_nucleic_acid')
+#['ATG GCT/GCC/GCA/GCG GAA/GAG GGT/GGC/GGA/GGG TTA/TTG/CTT/CTC/CTA/CTG CCT/CCC/CCA/CCG',
+#'TTA/TTG/CTT/CTC/CTA/CTG TAT/TAC GGT/GGC/GGA/GGG TCT/TCC/TCA/TCG/AGT/AGC CAA/CAG ACT/ACC/ACA/ACG']
+
+run_protein_tools('MAEGLP', 'LYGSQT','count_amino_acids')
+#[{'M': 1, 'A': 1, 'E': 1, 'G': 1, 'L': 1, 'P': 1},
+#{'L': 1, 'Y': 1, 'G': 1, 'S': 1, 'Q': 1, 'T': 1}]
 ```
    
 ### Additional information
 - The program works **only** with protein and RNA sequences. If any of the entered sequences contain inappropriate characters or cannot exist, the program will display an error. Sequences can contain characters of any case.
 
 ```python
-run_protein_tools('PROTEIN', 'compute_molecular_weight') # ValueError: Invalid protein sequence
-run_protein_tools('AUGGAU_AUcAAUAA', 'MDKL*', 'check_mutations')# ValueError: Invalid RNA sequence
+run_protein_tools('PROTEIN', 'compute_molecular_weight') #ValueError: Invalid protein sequence
+run_protein_tools('AUGGAU_AUcAAUAA', 'MDKL*', 'check_mutations') #ValueError: Invalid RNA sequence
+```
+- For the procedure `check_mutations` there are extra requirements for RNA and protein sequences: mRNA sequences must contain **start-codon** and **one of the stop-codons**, protein sequnces must start with **"M"** and ends with **"*"** (stop-codon). 
+```python
+run_protein_tools("AUGGUAGGGAAAUUUUGA", "MGGKF", 'check_mutations') #ValueError: Stop (*) is absent
+run_protein_tools("AUGGUAGGGAAAUUUUGA", "GGKF*", 'check_mutations') #ValueError: Start (M) is absent
 ```
-
 ### Contacts
 Please use contacts below to reach out with any comments, concerns, or discussions regarding **protein_tools.py.** <br>
 - Artyom Toropov ([@artyomtorr](https://github.com/artyomtorr/)) <br>
@@ -41,6 +61,6 @@ Please use contacts below to reach out with any comments, concerns, or discussio
 
 
 *Author contributions:* <br> 
-Artyom Toropov (teamlead): functions `is_protein`, `compute_molecular_weight`, `run_protein_tools` <br> 
-Sofiya Vinogradova: functions ..., <br> 
-Nikita Zherko: functions `compute_hydrophobicity`, `check_mutations`
+Artyom Toropov (teamlead): functions `is_protein`, `is_rna`, `compute_molecular_weight`, `run_protein_tools` <br> 
+Sofiya Vinogradova: functions `compute_length`, `count_amino_acids`, `protein_to_nucleic_acid` <br> 
+Nikita Zherko: functions `compute_hydrophobicity`, `translate_rna`, `check_mutations`

From 7acecfbcabe59a712dbf26fdf1e8929940703388 Mon Sep 17 00:00:00 2001
From: sofiyaga <sofiyaga@icloud.com>
Date: Sun, 1 Oct 2023 10:54:36 +0300
Subject: [PATCH 19/25] Update file from HW4_Toropov branch

---
 protein_tools.py | 385 ++++++++++++++++++++++++++++-------------------
 1 file changed, 232 insertions(+), 153 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index e0fb41b..5dc669a 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -1,153 +1,232 @@
-codon_table = {
-        'A': ['GCT', 'GCC', 'GCA', 'GCG'],
-        'C': ['TGT', 'TGC'],
-        'D': ['GAT', 'GAC'],
-        'E': ['GAA', 'GAG'],
-        'F': ['TTT', 'TTC'],
-        'G': ['GGT', 'GGC', 'GGA', 'GGG'],
-        'H': ['CAT', 'CAC'],
-        'I': ['ATT', 'ATC', 'ATA'],
-        'K': ['AAA', 'AAG'],
-        'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
-        'M': ['ATG'],
-        'N': ['AAT', 'AAC'],
-        'P': ['CCT', 'CCC', 'CCA', 'CCG'],
-        'Q': ['CAA', 'CAG'],
-        'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
-        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
-        'T': ['ACT', 'ACC', 'ACA', 'ACG'],
-        'V': ['GTT', 'GTC', 'GTA', 'GTG'],
-        'W': ['TGG'],
-        'Y': ['TAT', 'TAC']}
-
-
-
-def compute_length(*seqs: str):
-    """
-    Compute the length of the input amino acid sequence
-    
-    """
-    lens = []
-    for seq in seqs:
-        if is_protein(seq):
-            lens.append(len(seq))
-        else:
-            raise ValueError('Not a protein')
-    return lens if len(lens) > 1 else lens[0]    
-
-
-def level_of_hydrophobic(protein):
-    gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
-
-    count_of_gydrophobic = 0
-    if is_protein(protein):
-        for i in range(len(protein)):
-            if protein[i] in gydrophobic_aminoacids:
-                count_of_gydrophobic += 1
-
-    percentage = count_of_gydrophobic / len(protein) * 100
-
-    return f"Percentage of gydrophobic aminoacids in {protein} = {percentage}%."
-
-
-def translation(seq):
-    """
-    """
-    gene_code = {
-        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
-        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
-        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
-        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
-        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
-        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
-        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
-        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
-        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
-    }
-    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
-    protein = []
-    for triplet in triplets:
-        for aminoacid in gene_code.keys():
-            if triplet in gene_code[aminoacid]:
-                protein.append(aminoacid)
-
-    if is_protein("".join(protein)):
-        start = protein.index("M")
-        stop = protein.index("*")
-        return "".join(protein[start:stop + 1])
-    else:
-        return "This sequence doesn't include the gene."
-
-
-def mutations(seq, protein):
-    correct_protein = translation(seq)
-
-    if is_protein(protein):
-        bank_of_mutations = []
-        for i in range(len(correct_protein)):
-            if correct_protein[i] != protein[i]:
-                bank_of_mutations.append(f'{protein[i]}{i + 1}')
-
-        if len(bank_of_mutations) == 0:
-            return "Protein without mutations."
-        else:
-            return "Mutations:" + ", ".join(bank_of_mutations) + "."
-    else:
-        return "It isn't a protein."
-
-
-alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
-amino_acid_masses = {
-    'A': 71.03711,
-    'R': 156.10111,
-    'N': 114.04293,
-    'D': 115.02694,
-    'C': 103.00919,
-    'Q': 128.05858,
-    'E': 129.04259,
-    'G': 57.02146,
-    'H': 137.05891,
-    'I': 113.08406,
-    'L': 113.08406,
-    'K': 128.09496,
-    'M': 131.04049,
-    'F': 147.06841,
-    'P': 97.05276,
-    'S': 87.03203,
-    'T': 101.04768,
-    'W': 186.07931,
-    'Y': 163.06333,
-    'V': 99.06841
-}
-
-
-def is_protein(seq):
-    unique_chars = set(seq)
-    return unique_chars <= alphabet_protein
-
-
-def molecular_weight(seq):
-    molecular_weight = 0
-    for amino_acid in seq:
-        molecular_weight += amino_acid_masses[amino_acid]
-    return round(molecular_weight, 3)
-
-
-def run_protein_tools(*seqs_and_procedure):
-    procedure = seqs_and_procedure[-1]
-    seqs = seqs_and_procedure[:-1]
-
-    results = []
-
-    for seq in seqs:
-        seq = seq.upper()
-        if is_protein(seq) is not True:
-            raise ValueError("Invalid alphabet")
-        if procedure == 'molecular_weight':
-            results.append(molecular_weight(seq))
-
-    if len(results) == 1:
-        return results[0]
-    else:
-        return results
-      
+alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
+
+alphabet_rna = {'A', 'U', 'G', 'C'}
+
+amino_acid_masses = {
+    'A': 71.03711,
+    'R': 156.10111,
+    'N': 114.04293,
+    'D': 115.02694,
+    'C': 103.00919,
+    'Q': 128.05858,
+    'E': 129.04259,
+    'G': 57.02146,
+    'H': 137.05891,
+    'I': 113.08406,
+    'L': 113.08406,
+    'K': 128.09496,
+    'M': 131.04049,
+    'F': 147.06841,
+    'P': 97.05276,
+    'S': 87.03203,
+    'T': 101.04768,
+    'W': 186.07931,
+    'Y': 163.06333,
+    'V': 99.06841
+}
+
+gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+
+dna_codons = {
+        'A': ['GCT', 'GCC', 'GCA', 'GCG'],
+        'C': ['TGT', 'TGC'],
+        'D': ['GAT', 'GAC'],
+        'E': ['GAA', 'GAG'],
+        'F': ['TTT', 'TTC'],
+        'G': ['GGT', 'GGC', 'GGA', 'GGG'],
+        'H': ['CAT', 'CAC'],
+        'I': ['ATT', 'ATC', 'ATA'],
+        'K': ['AAA', 'AAG'],
+        'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
+        'M': ['ATG'],
+        'N': ['AAT', 'AAC'],
+        'P': ['CCT', 'CCC', 'CCA', 'CCG'],
+        'Q': ['CAA', 'CAG'],
+        'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
+        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
+        'T': ['ACT', 'ACC', 'ACA', 'ACG'],
+        'V': ['GTT', 'GTC', 'GTA', 'GTG'],
+        'W': ['TGG'],
+        'Y': ['TAT', 'TAC'],
+        '*': ["UAA", "UAG", "UGA"]}
+
+rna_codons = {
+        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
+        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
+        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
+        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
+        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
+        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
+        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
+        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
+        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
+    }
+
+
+def is_protein(seq:str):
+    """
+    Check the existence of a protein sequence, return boolean.
+    """
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_protein
+
+
+def is_rna(seq:str):
+    """
+    Check the existence of a RNA sequence, return boolean.
+    """
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_rna
+
+
+def compute_molecular_weight(seq:str):
+    """
+    Compute molecular weight (g/mol) of protein sequence.
+    """
+    molecular_weight = 0
+    for amino_acid in seq.upper():
+        molecular_weight += amino_acid_masses[amino_acid]
+    return round(molecular_weight, 3)
+
+
+def compute_length(seq:str):
+    """
+    Compute the length of protein sequence.
+    """
+    return len(seq)    
+
+
+def compute_hydrophobicity(protein:str) -> tuple:
+    """
+    Compute the percentage of gydrophobic aminoacids in protein sequence.
+
+    Argument:
+    - protein (str): protein sequence. Include hydrophobic 
+    and hydrophilic aminoacids.
+
+    Return:
+    - tuple, result of computation percentage of gydrophobic aminoacids.
+    """
+    count_of_gydrophobic = 0
+    for i in range(len(protein)):
+        if protein[i] in gydrophobic_aminoacids:
+            count_of_gydrophobic += 1
+
+    percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
+
+    return protein, percentage
+
+
+def translate_rna(seq:str) -> str:
+    """
+    Perform the translation of mRNA seguence into protein sequence.
+
+    Argument:
+    - seq (str): mRNA sequence. Must contain start-codon and one of 
+    the stop-codons.
+
+    Return:
+    - str, protein sequence after translation. 
+    Always starts with "M" and ends with "*".
+    """
+    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
+    protein = []
+    for triplet in triplets:
+        for aminoacid in rna_codons.keys():
+            if triplet in rna_codons[aminoacid]:
+                protein.append(aminoacid)
+
+    if protein[-1] != "*":
+        raise ValueError("Stop-codon (*) is absent in mRNA")
+    if protein[0] != "M":
+        raise ValueError("Start-codon (M) is absent in mRNA")
+
+    start = protein.index("M")
+    stop = protein.index("*")
+    return "".join(protein[start:stop + 1])
+
+
+def check_mutations(seq:str, protein:str) -> str:
+    """
+    Check mutations in the protein sequence after translation.
+
+    Use additional function "translation(seq)".
+    This function doesn't show mutations, which don't lead to 
+    change aminoacids in protein sequence. 
+
+    Arguments:
+    - seq (str): translation sequence of mRNA with/without mutations
+    - protein (str): protein for comparison with protein after translation.
+    Every protein starts with "M" and ends with "*" (stop-codon). 
+    Remark: is_protein(seq) doesn't see "*", but it's used in the other part of function.
+
+    Return:
+    - str, if mRNA without mutations return "Protein without mutations." 
+    If some mutations in protein, return aminoacid(s) and their position(s)
+
+    Examples:
+    - "AUGGUAGGGAAAUUUUGA", "MVGKF*" ->  "Protein without mutations."
+    - "AUGGUAGGGAAAUUUUGA", "MGGVF*" -> "Mutations:G2, V4."
+    - "AUGGUAGGGAAAUUUUGA", "MGGKF" –> "ValueError: Stop (*) is absent"
+    - "AUGGUAGGGAAAUUUUGA", "GGKF*" –> "ValueError: Start (M) is absent"
+    - "AUGAAAAAAUGA", "MK*" -> "ValueError: Different length of translated protein and protein"
+    """
+
+    correct_protein = translation(seq)
+    bank_of_mutations = []
+    
+    if is_protein(protein[:-1]) is not True:
+        raise ValueError("Invalid protein sequence")
+    if is_rna(seq) is not True:
+        raise ValueError("Invalid RNA sequence")
+    if protein[-1] != "*":
+        raise ValueError("Stop (*) is absent")
+    if protein[0] != "M":
+        raise ValueError("Start (M) is absent")
+    if len(protein) != len(seq)/3:
+        raise ValueError("Different length of translated protein and protein")
+    
+    for i in range(len(correct_protein)):
+        if correct_protein[i] != protein[i]:
+            bank_of_mutations.append(f'{protein[i]}{i + 1}')
+
+    if len(bank_of_mutations) == 0:
+        return "Protein without mutations."
+    else:
+        return "Mutations:" + ", ".join(bank_of_mutations) + "."
+
+
+def run_protein_tools(*args:str):
+    """
+    Function containing methods for protein analysis.
+    
+    Takes arbitrary number of arguments with protein sequencies
+    and the name of the procedure to be performed (always the 
+    last argument). Returns the result of the procedure as string 
+    if one sequnce is submitted or list if several.
+
+    If procedure 'check_mutations' is used then input must be only three
+    arguments: RNA sequence, protein sequence and the name of procedure 
+    itself.
+    """
+    *seqs, procedure = args
+    results = []
+    d_of_functions = {'compute_molecular_weight': compute_molecular_weight, 
+                  'compute_length': compute_length,
+                  'compute_hydrophobicity': compute_hydrophobicity,
+                 }
+    if procedure == 'check_mutations':
+        results.append(check_mutations(seqs[0], seqs[1]))
+    else:
+        for seq in seqs:
+            if is_protein(seq) is not True:
+                raise ValueError("Invalid protein sequence")
+            if procedure not in d_of_functions:
+                raise ValueError("Wrong procedure name")
+            else:
+                results.append(d_of_functions[procedure](seq))
+    if len(results) == 1:
+        return results[0]
+    else:
+        return results
+        

From a2022bc6de1e1bafe51b9522c7ab089a72507c1f Mon Sep 17 00:00:00 2001
From: sofiyaga <sofiyaga@icloud.com>
Date: Sun, 1 Oct 2023 10:57:18 +0300
Subject: [PATCH 20/25] Add docstrings and typing in function compute_length

---
 protein_tools.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/protein_tools.py b/protein_tools.py
index 5dc669a..4e131ec 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -89,11 +89,21 @@ def compute_molecular_weight(seq:str):
     return round(molecular_weight, 3)
 
 
-def compute_length(seq:str):
+def compute_length(protein: str) -> int:
     """
-    Compute the length of protein sequence.
+    Compute the length of the input protein sequence.
+    
+     Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - string, length of the input protein sequence.
+    
+    Example:
+    
+    'MGHIKCE' -> 7
     """
-    return len(seq)    
+    return len(protein)  
 
 
 def compute_hydrophobicity(protein:str) -> tuple:

From b89318081fc287652beb7adc795d199c921f8b2c Mon Sep 17 00:00:00 2001
From: sofiyaga <sofiyaga@icloud.com>
Date: Sun, 1 Oct 2023 10:59:32 +0300
Subject: [PATCH 21/25] Add function protein_to_dna

---
 protein_tools.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/protein_tools.py b/protein_tools.py
index 4e131ec..fc6e23d 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -103,7 +103,36 @@ def compute_length(protein: str) -> int:
     
     'MGHIKCE' -> 7
     """
-    return len(protein)  
+    return len(protein)
+
+def protein_to_dna(protein: str) -> str:
+    
+    """
+    Returns possible variants of DNAs for a given protein sequence.
+    
+    Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - string, variants of nucleic acids. 
+    If several codons correspond to a given amino acid they are displayed with a '/'.
+    
+    Does not distinguish between lowercase and uppercase letters.
+    
+    Examples:
+    
+    -'MACDRS' -> 'ATG GCT/GCC/GCA/GCG TGT/TGC GAT/GAC CGT/CGC/CGA/CGG/AGA/AGG TCT/TCC/TCA/TCG/AGT/AGC'
+    -'MaCdrS' -> 'ATG GCT/GCC/GCA/GCG TGT/TGC GAT/GAC CGT/CGC/CGA/CGG/AGA/AGG TCT/TCC/TCA/TCG/AGT/AGC'
+    
+    """
+    nucleic_acid_seq = ''
+    
+    for aa in protein.upper():
+        codons = dna_codons.get(aa)
+        nucleic_acid_seq += '/'.join(codons) + ' '
+    
+            
+    return nucleic_acid_seq.replace(' ', '', -1)  
 
 
 def compute_hydrophobicity(protein:str) -> tuple:

From 3742858a22ce2314df332a72da9c3169835a85e1 Mon Sep 17 00:00:00 2001
From: sofiyaga <sofiyaga@icloud.com>
Date: Sun, 1 Oct 2023 11:00:05 +0300
Subject: [PATCH 22/25] def count_amino_acids(protein: str) -> dict:

---
 protein_tools.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/protein_tools.py b/protein_tools.py
index fc6e23d..5b88f98 100644
--- a/protein_tools.py
+++ b/protein_tools.py
@@ -134,6 +134,33 @@ def protein_to_dna(protein: str) -> str:
             
     return nucleic_acid_seq.replace(' ', '', -1)  
 
+def count_amino_acids(protein: str) -> dict:
+    
+    """
+    Calculates the number of each aminoacid in a given protein sequence.
+    
+    Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - dictionary, where a key is the aminoacid letter and value is number of this aminoacid. 
+    
+    Does not distinguish between lowercase and uppercase letters.
+
+    Examples:
+    
+    -'MACDRS' -> {'M': 1, 'A': 1, 'C': 1, 'D': 1, 'R': 1, 'S': 1}
+    -'MaCdrS' -> {'M': 1, 'A': 1, 'C': 1, 'D': 1, 'R': 1, 'S': 1}
+    
+    """
+    
+    amino_acids_dict = {}
+    for aa in protein.upper():
+        if aa in amino_acids_dict:
+            amino_acids_dict[aa] += 1
+        else:
+            amino_acids_dict[aa] = 1
+    return amino_acids_dict
 
 def compute_hydrophobicity(protein:str) -> tuple:
     """

From 161edf180f250ca3aaf30d84adc494b1ec681dcb Mon Sep 17 00:00:00 2001
From: Artem Toropov <144557024+artyomtorr@users.noreply.github.com>
Date: Sun, 1 Oct 2023 10:04:32 +0100
Subject: [PATCH 23/25] Fix typos in README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f8b086e..734790f 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ The tool works by calling the function `run_protein_tools`, which takes arbitrar
 - `compute_length` — computes the number of amino acids in protein sequence
 - `compute_hydrophobicity` — computes the percentage of gydrophobic aminoacids in protein sequence
 - `check_mutations` — checks missense mutations in the protein sequence after translation
-- `protein_to_nucleic_acid`- returns possible variants of DNAs for a given protein sequence
+- `protein_to_dna`- returns possible variants of DNAs for a given protein sequence
 - `count_amino_acids` - calculates the number of each aminoacid in protein sequence
 
 ### Examples
@@ -31,7 +31,7 @@ run_protein_tools('MAEGEITNLP', 'tGQYLAMDTSgLLYGSQT', 'compute_hydrophobicity')
 run_protein_tools('AUGGAUCAUcAAUAA', 'MDKL*', 'check_mutations')
 #'Mutations: K3, L4.'
 
-run_protein_tools('MAEGLP', 'LYGSQT','protein_to_nucleic_acid')
+run_protein_tools('MAEGLP', 'LYGSQT','protein_to_dna')
 #['ATG GCT/GCC/GCA/GCG GAA/GAG GGT/GGC/GGA/GGG TTA/TTG/CTT/CTC/CTA/CTG CCT/CCC/CCA/CCG',
 #'TTA/TTG/CTT/CTC/CTA/CTG TAT/TAC GGT/GGC/GGA/GGG TCT/TCC/TCA/TCG/AGT/AGC CAA/CAG ACT/ACC/ACA/ACG']
 
@@ -62,5 +62,5 @@ Please use contacts below to reach out with any comments, concerns, or discussio
 
 *Author contributions:* <br> 
 Artyom Toropov (teamlead): functions `is_protein`, `is_rna`, `compute_molecular_weight`, `run_protein_tools` <br> 
-Sofiya Vinogradova: functions `compute_length`, `count_amino_acids`, `protein_to_nucleic_acid` <br> 
+Sofiya Vinogradova: functions `compute_length`, `count_amino_acids`, `protein_to_dna` <br> 
 Nikita Zherko: functions `compute_hydrophobicity`, `translate_rna`, `check_mutations`

From c3b14775483432ab2ee509e539b4f14b027be163 Mon Sep 17 00:00:00 2001
From: Artyom <toropov.01@bk.ru>
Date: Sun, 1 Oct 2023 13:18:06 +0100
Subject: [PATCH 24/25] Fix run_protein_tools function and format PEP8

---
 HW4_Toropov/protein_tools.py | 390 ++++++++++++++++++++++++++++++-----
 1 file changed, 336 insertions(+), 54 deletions(-)

diff --git a/HW4_Toropov/protein_tools.py b/HW4_Toropov/protein_tools.py
index 87bff7b..50b670f 100644
--- a/HW4_Toropov/protein_tools.py
+++ b/HW4_Toropov/protein_tools.py
@@ -1,54 +1,336 @@
-alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
-amino_acid_masses = {
-    'A': 71.03711,
-    'R': 156.10111,
-    'N': 114.04293,
-    'D': 115.02694,
-    'C': 103.00919,
-    'Q': 128.05858,
-    'E': 129.04259,
-    'G': 57.02146,
-    'H': 137.05891,
-    'I': 113.08406,
-    'L': 113.08406,
-    'K': 128.09496,
-    'M': 131.04049,
-    'F': 147.06841,
-    'P': 97.05276,
-    'S': 87.03203,
-    'T': 101.04768,
-    'W': 186.07931,
-    'Y': 163.06333,
-    'V': 99.06841
-}
-
-
-def is_protein(seq):
-    unique_chars = set(seq)
-    return unique_chars <= alphabet_protein
-
-
-def molecular_weight(seq):
-    molecular_weight = 0
-    for amino_acid in seq:
-        molecular_weight += amino_acid_masses[amino_acid]
-    return round(molecular_weight, 3)
-
-
-def run_protein_tools(*seqs_and_procedure):
-    procedure = seqs_and_procedure[-1]
-    seqs = seqs_and_procedure[:-1]
-
-    results = []
-
-    for seq in seqs:
-        seq = seq.upper()
-        if is_protein(seq) is not True:
-            raise ValueError("Invalid alphabet")
-        if procedure == 'molecular_weight':
-            results.append(molecular_weight(seq))
-
-    if len(results) == 1:
-        return results[0]
-    else:
-        return results
+alphabet_protein = {
+    "A",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "K",
+    "L",
+    "M",
+    "N",
+    "P",
+    "Q",
+    "R",
+    "S",
+    "T",
+    "V",
+    "W",
+    "Y",
+}
+
+alphabet_rna = {"A", "U", "G", "C"}
+
+amino_acid_masses = {
+    "A": 71.03711,
+    "R": 156.10111,
+    "N": 114.04293,
+    "D": 115.02694,
+    "C": 103.00919,
+    "Q": 128.05858,
+    "E": 129.04259,
+    "G": 57.02146,
+    "H": 137.05891,
+    "I": 113.08406,
+    "L": 113.08406,
+    "K": 128.09496,
+    "M": 131.04049,
+    "F": 147.06841,
+    "P": 97.05276,
+    "S": 87.03203,
+    "T": 101.04768,
+    "W": 186.07931,
+    "Y": 163.06333,
+    "V": 99.06841,
+}
+
+gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
+
+dna_codons = {
+    "A": ["GCT", "GCC", "GCA", "GCG"],
+    "C": ["TGT", "TGC"],
+    "D": ["GAT", "GAC"],
+    "E": ["GAA", "GAG"],
+    "F": ["TTT", "TTC"],
+    "G": ["GGT", "GGC", "GGA", "GGG"],
+    "H": ["CAT", "CAC"],
+    "I": ["ATT", "ATC", "ATA"],
+    "K": ["AAA", "AAG"],
+    "L": ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"],
+    "M": ["ATG"],
+    "N": ["AAT", "AAC"],
+    "P": ["CCT", "CCC", "CCA", "CCG"],
+    "Q": ["CAA", "CAG"],
+    "R": ["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"],
+    "S": ["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"],
+    "T": ["ACT", "ACC", "ACA", "ACG"],
+    "V": ["GTT", "GTC", "GTA", "GTG"],
+    "W": ["TGG"],
+    "Y": ["TAT", "TAC"],
+    "*": ["UAA", "UAG", "UGA"],
+}
+
+rna_codons = {
+    "F": ["UUC", "UUU"],
+    "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
+    "I": ["AUU", "AUC", "AUA"],
+    "M": ["AUG"],
+    "V": ["GUU", "GUC", "GUA", "GUG"],
+    "S": ["UCU", "UCC", "UCA", "UCG"],
+    "P": ["CCU", "CCC", "CCA", "CCG"],
+    "T": ["ACU", "ACC", "ACA", "ACG"],
+    "A": ["GCU", "GCC", "GCA", "GCG"],
+    "Y": ["UAC", "UAU"],
+    "*": ["UAA", "UAG", "UGA"],
+    "H": ["CAU", "CAC"],
+    "Q": ["CAA", "CAG"],
+    "N": ["AAU", "AAC"],
+    "K": ["AAA", "AAG"],
+    "D": ["GAU", "GAC"],
+    "E": ["GAA", "GAG"],
+    "C": ["UGU", "UGC"],
+    "W": ["UGG"],
+    "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
+    "S": ["AGU", "AGC"],
+    "G": ["GGU", "GGC", "GGA", "GGG"],
+}
+
+
+def is_protein(seq: str):
+    """
+    Check the existence of a protein sequence, return boolean.
+    """
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_protein
+
+
+def is_rna(seq: str):
+    """
+    Check the existence of a RNA sequence, return boolean.
+    """
+    unique_chars = set(seq.upper())
+    return unique_chars <= alphabet_rna
+
+
+def compute_molecular_weight(protein: str) -> tuple:
+    """
+    Compute molecular weight (g/mol) of protein sequence.
+
+    Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - tuple with protein sequence and computed molecular
+    weight (float rounded to 3 decimal places).
+    """
+    molecular_weight = 0
+    for amino_acid in protein.upper():
+        molecular_weight += amino_acid_masses[amino_acid]
+    return protein, round(molecular_weight, 3)
+
+
+def compute_length(protein: str) -> tuple:
+    """
+    Compute the length of the input protein sequence.
+
+     Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - tuple with protein sequence and computed length.
+    """
+    return protein, len(protein)
+
+
+def protein_to_dna(protein: str) -> str:
+    """
+    Returns possible variants of DNAs for a given protein sequence.
+
+    Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - string, variants of nucleic acids.
+    If several codons correspond to a given amino acid they are displayed with a '/'.
+
+    Does not distinguish between lowercase and uppercase letters.
+
+    Examples:
+
+    -'MACDRS' -> 'ATG GCT/GCC/GCA/GCG TGT/TGC GAT/GAC CGT/CGC/CGA/CGG/AGA/AGG TCT/TCC/TCA/TCG/AGT/AGC'
+    -'MaCdrS' -> 'ATG GCT/GCC/GCA/GCG TGT/TGC GAT/GAC CGT/CGC/CGA/CGG/AGA/AGG TCT/TCC/TCA/TCG/AGT/AGC'
+    """
+    nucleic_acid_seq = ""
+
+    for aa in protein.upper():
+        codons = dna_codons.get(aa)
+        nucleic_acid_seq += "/".join(codons) + " "
+
+    return nucleic_acid_seq[:-1]
+
+
+def count_amino_acids(protein: str) -> dict:
+    """
+    Calculates the number of each aminoacid in a given protein sequence.
+
+    Argument:
+    - protein (str): protein sequence.
+
+    Return:
+    - dictionary, where a key is the aminoacid letter and value is number of this aminoacid.
+
+    Does not distinguish between lowercase and uppercase letters.
+
+    Examples:
+
+    -'MACDRS' -> {'M': 1, 'A': 1, 'C': 1, 'D': 1, 'R': 1, 'S': 1}
+    -'MaCdrS' -> {'M': 1, 'A': 1, 'C': 1, 'D': 1, 'R': 1, 'S': 1}
+    """
+    amino_acids_dict = {}
+    for aa in protein.upper():
+        if aa in amino_acids_dict:
+            amino_acids_dict[aa] += 1
+        else:
+            amino_acids_dict[aa] = 1
+    return amino_acids_dict
+
+
+def compute_hydrophobicity(protein: str) -> tuple:
+    """
+    Compute the percentage of gydrophobic aminoacids in protein sequence.
+
+    Argument:
+    - protein (str): protein sequence. Includes hydrophobic
+    and hydrophilic aminoacids.
+
+    Return:
+    - tuple with protein sequence and computed percentage
+    of gydrophobic aminoacids.
+    """
+    count_of_gydrophobic = 0
+    for i in range(len(protein)):
+        if protein[i] in gydrophobic_aminoacids:
+            count_of_gydrophobic += 1
+
+    percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
+
+    return protein, percentage
+
+
+def translate_rna(rna: str) -> str:
+    """
+    Perform the translation of mRNA seguence into protein sequence.
+
+    Argument:
+    - rna (str): mRNA sequence. Must contain start-codon and one of
+    the stop-codons.
+
+    Return:
+    - str, protein sequence after translation.
+    Always starts with "M" and ends with "*".
+    """
+    triplets = [rna[i : i + 3].upper() for i in range(0, len(rna), 3)]
+    protein = []
+    for triplet in triplets:
+        for aminoacid in rna_codons.keys():
+            if triplet in rna_codons[aminoacid]:
+                protein.append(aminoacid)
+
+    if protein[-1] != "*":
+        raise ValueError("Stop-codon (*) is absent in mRNA")
+    if protein[0] != "M":
+        raise ValueError("Start-codon (M) is absent in mRNA")
+
+    start = protein.index("M")
+    stop = protein.index("*")
+    return "".join(protein[start : stop + 1])
+
+
+def check_mutations(rna: str, protein: str) -> str:
+    """
+    Check missense mutations in the protein sequence after translation.
+
+    Uses additional function "translate_rna(seq)".
+
+    Arguments:
+    - rna (str): sequence of mRNA with/without mutations.
+    Must contain start-codon and one of the stop-codons.
+    - protein (str): protein sequence translated from mRNA.
+    Must start with "M" and ends with "*" (stop-codon).
+
+    Note: is_protein(seq) doesn't see "*", but it's used in the other part of function.
+
+    Return:
+    - str, if mRNA without mutations return "Protein without mutations."
+    If there are mutations in protein, returns aminoacid(s) and their position(s)
+
+    Examples:
+    - "AUGGUAGGGAAAUUUUGA", "MVGKF*" ->  "Protein without mutations."
+    - "AUGGUAGGGAAAUUUUGA", "MGGVF*" -> "Mutations:G2, V4."
+    - "AUGGUAGGGAAAUUUUGA", "MGGKF" –> "ValueError: Stop (*) is absent"
+    - "AUGGUAGGGAAAUUUUGA", "GGKF*" –> "ValueError: Start (M) is absent"
+    - "AUGAAAAAAUGA", "MK*" -> "ValueError: Different length of translated protein and protein"
+    """
+    correct_protein = translate_rna(rna)
+    bank_of_mutations = []
+
+    if is_protein(protein[:-1]) is not True:
+        raise ValueError("Invalid protein sequence")
+    if is_rna(rna) is not True:
+        raise ValueError("Invalid RNA sequence")
+    if protein[-1] != "*":
+        raise ValueError("Stop (*) is absent")
+    if protein[0] != "M":
+        raise ValueError("Start (M) is absent")
+    if len(protein) != len(rna) / 3:
+        raise ValueError("Different length of translated protein and protein")
+
+    for i in range(len(correct_protein)):
+        if correct_protein[i] != protein[i]:
+            bank_of_mutations.append(f"{protein[i]}{i + 1}")
+
+    if len(bank_of_mutations) == 0:
+        return "Protein without mutations."
+    else:
+        return "Mutations: " + ", ".join(bank_of_mutations) + "."
+
+
+def run_protein_tools(*args: str):
+    """
+    Function containing methods for protein analysis.
+
+    Takes arbitrary number of arguments with protein sequencies
+    and the name of the procedure to be performed (always the last
+    argument). Returns the result of the procedure as string, tuple
+    or dictionary if one sequnce is submitted or list if several.
+
+    Note: if procedure 'check_mutations' is used then input must
+    contain only three arguments: RNA sequence, protein sequence
+    and the name of procedure itself.
+    """
+    *seqs, procedure = args
+    results = []
+    d_of_functions = {
+        "compute_molecular_weight": compute_molecular_weight,
+        "compute_length": compute_length,
+        "compute_hydrophobicity": compute_hydrophobicity,
+        "count_amino_acids": count_amino_acids,
+        "protein_to_dna": protein_to_dna
+
+    }
+    if procedure == "check_mutations":
+        results.append(check_mutations(seqs[0], seqs[1]))
+    else:
+        for seq in seqs:
+            if is_protein(seq) is not True:
+                raise ValueError("Invalid protein sequence")
+            if procedure not in d_of_functions:
+                raise ValueError("Wrong procedure name")
+            else:
+                results.append(d_of_functions[procedure](seq))
+    if len(results) == 1:
+        return results[0]
+    else:
+        return results

From b2a9f556b1e2f8cee0fee5a38dab1b2dd486a780 Mon Sep 17 00:00:00 2001
From: Artem Toropov <144557024+artyomtorr@users.noreply.github.com>
Date: Sun, 1 Oct 2023 10:23:39 +0100
Subject: [PATCH 25/25] Delete extra protein_tools.py

---
 protein_tools.py | 301 -----------------------------------------------
 1 file changed, 301 deletions(-)
 delete mode 100644 protein_tools.py

diff --git a/protein_tools.py b/protein_tools.py
deleted file mode 100644
index 2b1f826..0000000
--- a/protein_tools.py
+++ /dev/null
@@ -1,301 +0,0 @@
-alphabet_protein = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}
-
-alphabet_rna = {'A', 'U', 'G', 'C'}
-
-amino_acid_masses = {
-    'A': 71.03711,
-    'R': 156.10111,
-    'N': 114.04293,
-    'D': 115.02694,
-    'C': 103.00919,
-    'Q': 128.05858,
-    'E': 129.04259,
-    'G': 57.02146,
-    'H': 137.05891,
-    'I': 113.08406,
-    'L': 113.08406,
-    'K': 128.09496,
-    'M': 131.04049,
-    'F': 147.06841,
-    'P': 97.05276,
-    'S': 87.03203,
-    'T': 101.04768,
-    'W': 186.07931,
-    'Y': 163.06333,
-    'V': 99.06841
-}
-
-gydrophobic_aminoacids = {"A", "V", "L", "I", "P", "F", "W", "M"}
-
-dna_codons = {
-        'A': ['GCT', 'GCC', 'GCA', 'GCG'],
-        'C': ['TGT', 'TGC'],
-        'D': ['GAT', 'GAC'],
-        'E': ['GAA', 'GAG'],
-        'F': ['TTT', 'TTC'],
-        'G': ['GGT', 'GGC', 'GGA', 'GGG'],
-        'H': ['CAT', 'CAC'],
-        'I': ['ATT', 'ATC', 'ATA'],
-        'K': ['AAA', 'AAG'],
-        'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
-        'M': ['ATG'],
-        'N': ['AAT', 'AAC'],
-        'P': ['CCT', 'CCC', 'CCA', 'CCG'],
-        'Q': ['CAA', 'CAG'],
-        'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
-        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
-        'T': ['ACT', 'ACC', 'ACA', 'ACG'],
-        'V': ['GTT', 'GTC', 'GTA', 'GTG'],
-        'W': ['TGG'],
-        'Y': ['TAT', 'TAC'],
-        '*': ["UAA", "UAG", "UGA"]}
-
-rna_codons = {
-        "F": ["UUC", "UUU"], "L": ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],
-        "I": ["AUU", "AUC", "AUA"], "M": ["AUG"], "V": ["GUU", "GUC", "GUA", "GUG"],
-        "S": ["UCU", "UCC", "UCA", "UCG"], "P": ["CCU", "CCC", "CCA", "CCG"],
-        "T": ["ACU", "ACC", "ACA", "ACG"], "A": ["GCU", "GCC", "GCA", "GCG"],
-        "Y": ["UAC", "UAU"], "*": ["UAA", "UAG", "UGA"], "H": ["CAU", "CAC"],
-        "Q": ["CAA", "CAG"], "N": ["AAU", "AAC"],
-        "K": ["AAA", "AAG"], "D": ["GAU", "GAC"], "E": ["GAA", "GAG"],
-        "C": ["UGU", "UGC"], "W": ["UGG"], "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
-        "S": ["AGU", "AGC"], "G": ["GGU", "GGC", "GGA", "GGG"]
-    }
-
-
-def is_protein(seq:str):
-    """
-    Check the existence of a protein sequence, return boolean.
-    """
-    unique_chars = set(seq.upper())
-    return unique_chars <= alphabet_protein
-
-
-def is_rna(seq:str):
-    """
-    Check the existence of a RNA sequence, return boolean.
-    """
-    unique_chars = set(seq.upper())
-    return unique_chars <= alphabet_rna
-
-
-def compute_molecular_weight(seq:str) -> tuple:
-    """
-    Compute molecular weight (g/mol) of protein sequence.
-    
-    Argument:
-    - protein (str): protein sequence.
-    
-    Return: 
-    - tuple with protein sequence and computed molecular 
-    weight (float rounded to 3 decimal places).
-    """
-    molecular_weight = 0
-    for amino_acid in seq.upper():
-        molecular_weight += amino_acid_masses[amino_acid]
-    return seq, round(molecular_weight, 3)
-
-
-def compute_length(protein: str) -> tuple:
-    """
-    Compute the length of the input protein sequence.
-    
-     Argument:
-    - protein (str): protein sequence.
-    
-    Return: 
-    - tuple with protein sequence and computed length.
-    """
-    return seq, len(seq)  
-
-def protein_to_dna(protein: str) -> str:
-    
-    """
-    Returns possible variants of DNAs for a given protein sequence.
-    
-    Argument:
-    - protein (str): protein sequence.
-
-    Return:
-    - string, variants of nucleic acids. 
-    If several codons correspond to a given amino acid they are displayed with a '/'.
-    
-    Does not distinguish between lowercase and uppercase letters.
-    
-    Examples:
-    
-    -'MACDRS' -> 'ATG GCT/GCC/GCA/GCG TGT/TGC GAT/GAC CGT/CGC/CGA/CGG/AGA/AGG TCT/TCC/TCA/TCG/AGT/AGC'
-    -'MaCdrS' -> 'ATG GCT/GCC/GCA/GCG TGT/TGC GAT/GAC CGT/CGC/CGA/CGG/AGA/AGG TCT/TCC/TCA/TCG/AGT/AGC'
-    
-    """
-    nucleic_acid_seq = ''
-    
-    for aa in protein.upper():
-        codons = dna_codons.get(aa)
-        nucleic_acid_seq += '/'.join(codons) + ' '
-            
-    return nucleic_acid_seq.replace(' ', '', -1)  
-
-  
-def count_amino_acids(protein: str) -> dict:
-    
-    """
-    Calculates the number of each aminoacid in a given protein sequence.
-    
-    Argument:
-    - protein (str): protein sequence.
-
-    Return:
-    - dictionary, where a key is the aminoacid letter and value is number of this aminoacid. 
-    
-    Does not distinguish between lowercase and uppercase letters.
-
-    Examples:
-    
-    -'MACDRS' -> {'M': 1, 'A': 1, 'C': 1, 'D': 1, 'R': 1, 'S': 1}
-    -'MaCdrS' -> {'M': 1, 'A': 1, 'C': 1, 'D': 1, 'R': 1, 'S': 1}
-    
-    """
-    
-    amino_acids_dict = {}
-    for aa in protein.upper():
-        if aa in amino_acids_dict:
-            amino_acids_dict[aa] += 1
-        else:
-            amino_acids_dict[aa] = 1
-    return amino_acids_dict
-
-
-def compute_hydrophobicity(protein:str) -> tuple:
-    """
-    Compute the percentage of gydrophobic aminoacids in protein sequence.
-
-    Argument:
-    - protein (str): protein sequence. Includes hydrophobic 
-    and hydrophilic aminoacids.
-
-    Return:
-    - tuple with protein sequence and computed percentage 
-    of gydrophobic aminoacids.
-    """
-    count_of_gydrophobic = 0
-    for i in range(len(protein)):
-        if protein[i] in gydrophobic_aminoacids:
-            count_of_gydrophobic += 1
-
-    percentage = round(count_of_gydrophobic / len(protein) * 100, 3)
-
-    return protein, percentage
-
-
-def translate_rna(seq:str) -> str:
-    """
-    Perform the translation of mRNA seguence into protein sequence.
-
-    Argument:
-    - seq (str): mRNA sequence. Must contain start-codon and one of 
-    the stop-codons.
-
-    Return:
-    - str, protein sequence after translation. 
-    Always starts with "M" and ends with "*".
-    """
-    triplets = [seq[i:i + 3].upper() for i in range(0, len(seq), 3)]
-    protein = []
-    for triplet in triplets:
-        for aminoacid in rna_codons.keys():
-            if triplet in rna_codons[aminoacid]:
-                protein.append(aminoacid)
-
-    if protein[-1] != "*":
-        raise ValueError("Stop-codon (*) is absent in mRNA")
-    if protein[0] != "M":
-        raise ValueError("Start-codon (M) is absent in mRNA")
-
-    start = protein.index("M")
-    stop = protein.index("*")
-    return "".join(protein[start:stop + 1])
-
-
-def check_mutations(seq:str, protein:str) -> str:
-    """
-    Check missense mutations in the protein sequence after translation.
-
-    Uses additional function "translate_rna(seq)".
-
-    Arguments:
-    - seq (str): sequence of mRNA with/without mutations.
-    Must contain start-codon and one of the stop-codons.
-    - protein (str): protein sequence translated from mRNA.
-    Must start with "M" and ends with "*" (stop-codon). 
-    
-    Note: is_protein(seq) doesn't see "*", but it's used in the other part of function.
-
-    Return:
-    - str, if mRNA without mutations return "Protein without mutations." 
-    If there are mutations in protein, returns aminoacid(s) and their position(s)
-
-    Examples:
-    - "AUGGUAGGGAAAUUUUGA", "MVGKF*" ->  "Protein without mutations."
-    - "AUGGUAGGGAAAUUUUGA", "MGGVF*" -> "Mutations:G2, V4."
-    - "AUGGUAGGGAAAUUUUGA", "MGGKF" –> "ValueError: Stop (*) is absent"
-    - "AUGGUAGGGAAAUUUUGA", "GGKF*" –> "ValueError: Start (M) is absent"
-    - "AUGAAAAAAUGA", "MK*" -> "ValueError: Different length of translated protein and protein"
-    """
-    correct_protein = translate_rna(seq)
-    bank_of_mutations = []
-    
-    if is_protein(protein[:-1]) is not True:
-        raise ValueError("Invalid protein sequence")
-    if is_rna(seq) is not True:
-        raise ValueError("Invalid RNA sequence")
-    if protein[-1] != "*":
-        raise ValueError("Stop (*) is absent")
-    if protein[0] != "M":
-        raise ValueError("Start (M) is absent")
-    if len(protein) != len(seq)/3:
-        raise ValueError("Different length of translated protein and protein")
-    
-    for i in range(len(correct_protein)):
-        if correct_protein[i] != protein[i]:
-            bank_of_mutations.append(f'{protein[i]}{i + 1}')
-
-    if len(bank_of_mutations) == 0:
-        return "Protein without mutations."
-    else:
-        return "Mutations: " + ", ".join(bank_of_mutations) + "."
-
-
-def run_protein_tools(*args:str):
-    """
-    Function containing methods for protein analysis.
-    
-    Takes arbitrary number of arguments with protein sequencies
-    and the name of the procedure to be performed (always the last 
-    argument). Returns the result of the procedure as string, tuple 
-    or dictionary if one sequnce is submitted or list if several.
-
-    Note: if procedure 'check_mutations' is used then input must 
-    contain only three arguments: RNA sequence, protein sequence 
-    and the name of procedure itself.
-    """
-    *seqs, procedure = args
-    results = []
-    d_of_functions = {'compute_molecular_weight': compute_molecular_weight, 
-                  'compute_length': compute_length,
-                  'compute_hydrophobicity': compute_hydrophobicity,
-                 }
-    if procedure == 'check_mutations':
-        results.append(check_mutations(seqs[0], seqs[1]))
-    else:
-        for seq in seqs:
-            if is_protein(seq) is not True:
-                raise ValueError("Invalid protein sequence")
-            if procedure not in d_of_functions:
-                raise ValueError("Wrong procedure name")
-            else:
-                results.append(d_of_functions[procedure](seq))
-    if len(results) == 1:
-        return results[0]
-    else:
-        return results