From d98ccaf8f9e3784252127c2639c2c843a9188dce Mon Sep 17 00:00:00 2001
From: Nikita <nikita.sapozhnikov1@gmail.com>
Date: Tue, 26 Sep 2023 07:14:08 +0300
Subject: [PATCH 01/30] Initial commit

---
 HW4_Sapozhnikov/README.md    | 19 +++++++++++++++++++
 HW4_Sapozhnikov/prototool.py | 16 ++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 HW4_Sapozhnikov/README.md
 create mode 100644 HW4_Sapozhnikov/prototool.py

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
new file mode 100644
index 0000000..8a4222d
--- /dev/null
+++ b/HW4_Sapozhnikov/README.md
@@ -0,0 +1,19 @@
+# HW 4. Functions 2
+> *This is the repo for the fourth homework of the BI Python 2023 course*
+
+### Title
+
+### Overview 
+
+### Usage
+
+### Options
+
+### Examples
+
+### Troubleshooting
+
+### Contacts
+
+Feel free to report any bugs and problems encountered.
+Email: nikita.sapozhnikov1@gmail.com
\ No newline at end of file
diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
new file mode 100644
index 0000000..98ca0a0
--- /dev/null
+++ b/HW4_Sapozhnikov/prototool.py
@@ -0,0 +1,16 @@
+def main():
+    """
+    an entry point to the tool
+
+    This tool provides the following functionality:
+    - local alignment of two sequences
+    - ...
+
+    To get started choose one of the possible programms to run:
+    1. Local alignment
+    Enter two protein sequences in 1- letter encoding. The code will return alignment scores and 
+    sequences aligned on each other. 
+    2. ...
+
+    """
+    pass
\ No newline at end of file

From 42c89b3c65607ba418caab66751abb4096efc343 Mon Sep 17 00:00:00 2001
From: Nikita <nikita.sapozhnikov1@gmail.com>
Date: Tue, 26 Sep 2023 07:46:38 +0300
Subject: [PATCH 02/30] Add local alignment function

---
 HW4_Sapozhnikov/prototool.py | 96 ++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 98ca0a0..4564f37 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -1,3 +1,99 @@
+def local_alignment(seq1: str, seq2: str, match=2, mismatch=-1, gap=-1, prettify: bool=True) -> list:
+    """
+    perform a local alignment of 2 given sequences
+
+    Args:
+    - seq1, seq2 (str) - sequences to align
+    - match, mismatch, gap (int) - alignment scoring and penalty values
+    defaulted to 2, -1, -1
+
+    Returns:
+    - a a dictionary of {'aligned_seq1':aligned_seq1,
+                        'aligned_seq2':aligned_seq2, 
+                        'alignment_score':alignment_score} 
+    """
+
+    m, n = len(seq1), len(seq2)
+    
+    # Initialize the score matrix and traceback matrix
+    score_matrix = [[0] * (n + 1) for _ in range(m + 1)]
+    traceback_matrix = [[None] * (n + 1) for _ in range(m + 1)]
+    
+    alignment_score = 0  # To keep track of the maximum score in the matrix
+    max_i, max_j = 0, 0  # To store the position of the maximum score
+
+    # Fill in the score matrix
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            if seq1[i - 1] == seq2[j - 1]:
+                match_score = score_matrix[i - 1][j - 1] + match
+            else:
+                match_score = score_matrix[i - 1][j - 1] + mismatch
+            
+            delete_score = score_matrix[i - 1][j] + gap
+            insert_score = score_matrix[i][j - 1] + gap
+            
+            # Calculate the maximum score for the current cell
+            score = max(0, match_score, delete_score, insert_score)
+            
+            # Update the score matrix and traceback matrix
+            score_matrix[i][j] = score
+            
+            if score > alignment_score:
+                alignment_score = score
+                max_i, max_j = i, j
+
+            if score == match_score:
+                traceback_matrix[i][j] = "match"
+            elif score == delete_score:
+                traceback_matrix[i][j] = "delete"
+            elif score == insert_score:
+                traceback_matrix[i][j] = "insert"
+            else:
+                traceback_matrix[i][j] = "none"
+    
+    # Traceback to find the aligned sequences
+    aligned_seq1 = []
+    aligned_seq2 = []
+    
+    i, j = max_i, max_j
+    
+    while i > 0 and j > 0:
+        if traceback_matrix[i][j] == "match":
+            aligned_seq1.append(seq1[i - 1])
+            aligned_seq2.append(seq2[j - 1])
+            i -= 1
+            j -= 1
+        elif traceback_matrix[i][j] == "delete":
+            aligned_seq1.append(seq1[i - 1])
+            aligned_seq2.append("-")
+            i -= 1
+        elif traceback_matrix[i][j] == "insert":
+            aligned_seq1.append("-")
+            aligned_seq2.append(seq2[j - 1])
+            j -= 1
+        else:
+            break
+    
+    # Reverse the aligned sequences
+    aligned_seq1 = "".join(aligned_seq1[::-1])
+    aligned_seq2 = "".join(aligned_seq2[::-1])
+
+    # Form an output dictionary
+    alignment_dict = {'aligned_seq1':aligned_seq1,
+                      'aligned_seq2':aligned_seq2,
+                      'alignment_score':alignment_score}  
+
+    # Prettify an alignment output
+    seq_on = (seq1 if seq1 <= seq2 else seq2)
+    if prettify == True:
+        prettify_alignment(seq_on, alignment_dict)  
+    else: 
+        pass
+
+    return alignment_dict
+
+
 def main():
     """
     an entry point to the tool

From 873f7d76cd0f5f3002d3c4055efe01b380b5ee49 Mon Sep 17 00:00:00 2001
From: Nikita <nikita.sapozhnikov1@gmail.com>
Date: Thu, 28 Sep 2023 13:39:11 +0300
Subject: [PATCH 03/30] Add local alignment functionality

Add check_input() to check the validity of the input in main()
Add local_alignment() to perform Smith-Waterman algorithm
Add prettify_alignment() to prettify the view of an alignment
---
 HW4_Sapozhnikov/prototool.py | 210 +++++++++++++++++++++++++++--------
 1 file changed, 165 insertions(+), 45 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 4564f37..799df57 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -1,44 +1,80 @@
-def local_alignment(seq1: str, seq2: str, match=2, mismatch=-1, gap=-1, prettify: bool=True) -> list:
+from typing import List, Optional, Tuple, Union
+
+
+def prettify_alignment(aligned_seq_on: str, aligned_seq2: str) -> None:
+    """
+    Prettifies alignment output by printing out two
+    sequences on top of each other
+
+    Finds the start of aligned sequence in the longer of sequences.\\
+    Prints the longer sequence as an upper one and aligned sequence
+    is bellow separated via vertical lines
+
+    Args:
+    - aligned_seq_on, aligned_seq2 - sequences
+    from the local_alignment()
+
+    Returns:
+    None \\
+    Prints out the prettified view in stdout
+    """
+
+    print(aligned_seq_on)
+    print('|' * len(aligned_seq2))
+    print(aligned_seq2)
+
+
+def local_alignment(seq_on: str,
+                    seq2: Union[List[str], str],
+                    alignment_dict: dict,
+                    seq_id: int,
+                    match=2,
+                    mismatch=-1,
+                    gap=-1,
+                    prettify: bool = True) -> dict:
     """
-    perform a local alignment of 2 given sequences
+    Perform a local alignment of 2 given sequences
 
     Args:
-    - seq1, seq2 (str) - sequences to align
-    - match, mismatch, gap (int) - alignment scoring and penalty values
+    - seq_on - the sequence to align onto
+    - seq2 - sequences to align
+    - alignment_dict - a dictionary to yield alignment results
+    - match, mismatch, gap - alignment scoring and penalty values
     defaulted to 2, -1, -1
+    - prettify - if True (default) prints out the prettified version
+    of sequences aligned on top of each other
+    - seq_id - itterator for a seq list
 
     Returns:
-    - a a dictionary of {'aligned_seq1':aligned_seq1,
-                        'aligned_seq2':aligned_seq2, 
-                        'alignment_score':alignment_score} 
+    - a a dictionary with alignment resluts
     """
 
-    m, n = len(seq1), len(seq2)
-    
+    len_seq_on, len_seq2 = len(seq_on), len(seq2)
+
     # Initialize the score matrix and traceback matrix
-    score_matrix = [[0] * (n + 1) for _ in range(m + 1)]
-    traceback_matrix = [[None] * (n + 1) for _ in range(m + 1)]
-    
+    score_matrix = [[0] * (len_seq2 + 1) for _ in range(len_seq_on + 1)]
+    traceback_matrix = [[None] * (len_seq2 + 1) for _ in range(len_seq_on + 1)]
+
     alignment_score = 0  # To keep track of the maximum score in the matrix
     max_i, max_j = 0, 0  # To store the position of the maximum score
 
     # Fill in the score matrix
-    for i in range(1, m + 1):
-        for j in range(1, n + 1):
-            if seq1[i - 1] == seq2[j - 1]:
+    for i in range(1, len_seq_on + 1):
+        for j in range(1, len_seq2 + 1):
+            if seq_on[i - 1] == seq2[j - 1]:
                 match_score = score_matrix[i - 1][j - 1] + match
             else:
                 match_score = score_matrix[i - 1][j - 1] + mismatch
-            
+
             delete_score = score_matrix[i - 1][j] + gap
             insert_score = score_matrix[i][j - 1] + gap
-            
+
             # Calculate the maximum score for the current cell
             score = max(0, match_score, delete_score, insert_score)
-            
+
             # Update the score matrix and traceback matrix
             score_matrix[i][j] = score
-            
+
             if score > alignment_score:
                 alignment_score = score
                 max_i, max_j = i, j
@@ -51,62 +87,146 @@ def local_alignment(seq1: str, seq2: str, match=2, mismatch=-1, gap=-1, prettify
                 traceback_matrix[i][j] = "insert"
             else:
                 traceback_matrix[i][j] = "none"
-    
+
     # Traceback to find the aligned sequences
-    aligned_seq1 = []
+    aligned_seq_on = []
     aligned_seq2 = []
-    
+
+    counter_identity: int = 0
+    counter_gaps: int = 0
+
     i, j = max_i, max_j
-    
+
     while i > 0 and j > 0:
         if traceback_matrix[i][j] == "match":
-            aligned_seq1.append(seq1[i - 1])
+            aligned_seq_on.append(seq_on[i - 1])
             aligned_seq2.append(seq2[j - 1])
+            counter_identity += 1
             i -= 1
             j -= 1
         elif traceback_matrix[i][j] == "delete":
-            aligned_seq1.append(seq1[i - 1])
+            aligned_seq_on.append(seq_on[i - 1])
             aligned_seq2.append("-")
+            counter_gaps += 1
             i -= 1
         elif traceback_matrix[i][j] == "insert":
-            aligned_seq1.append("-")
+            aligned_seq_on.append("-")
             aligned_seq2.append(seq2[j - 1])
+            counter_gaps += 1
             j -= 1
         else:
             break
-    
+
     # Reverse the aligned sequences
-    aligned_seq1 = "".join(aligned_seq1[::-1])
+    aligned_seq_on = "".join(aligned_seq_on[::-1])
     aligned_seq2 = "".join(aligned_seq2[::-1])
 
+    alignment_length = (len(aligned_seq_on)
+                        if len(aligned_seq_on) < len(aligned_seq2)
+                        else len(aligned_seq2))
+
     # Form an output dictionary
-    alignment_dict = {'aligned_seq1':aligned_seq1,
-                      'aligned_seq2':aligned_seq2,
-                      'alignment_score':alignment_score}  
+    alignment_dict['aligned_seq_on'] = aligned_seq_on
+
+    identity = round(counter_identity/alignment_length, 4)
+
+    alignment_dict[f'aligned_seq{seq_id+1}'] = {'seq': aligned_seq2,
+                                                'length': alignment_length,
+                                                'score': alignment_score,
+                                                'identity': identity,
+                                                'gaps': counter_gaps}
 
     # Prettify an alignment output
-    seq_on = (seq1 if seq1 <= seq2 else seq2)
-    if prettify == True:
-        prettify_alignment(seq_on, alignment_dict)  
-    else: 
+    if prettify is True:
+        prettify_alignment(aligned_seq_on, aligned_seq2)
+    else:
         pass
 
     return alignment_dict
 
 
-def main():
+def check_input(*args: List[str]) -> Tuple[List[str],
+                                           str,
+                                           Optional[str]]:
+    """
+    Function to check the validity of the input.
+
+    Args:
+    *args - are supposed to be all sequences to process and the method to
+    process with.
+    The method is supposed to be the last argument.
+
+    Returns:
+    - seqs_list - list of sequences
+    - method - a chosen method to use
+    - seq_on (optional) - in case of local_alignment method
     """
-    an entry point to the tool
 
-    This tool provides the following functionality:
-    - local alignment of two sequences
-    - ...
+    if len(args) < 1:
+        # Handle the case where there are no arguments
+        raise ValueError("No input defined.")
+    else:
+        # Check the last element of the input is a valid method
+        method = args[-1]
+        if method not in ['local_alignment', '', '', '', '']:
+            raise ValueError(method, " is not a valid method.")
+        else:
+            # Form a list with sequences from the input
+            seqs_list = list(args[:-1])
+            if method == 'local_alignment':
+                seq_on = seqs_list.pop(0)
+                return seqs_list, method, seq_on
+            seq_on = None
+            return seqs_list, method, seq_on
+
 
-    To get started choose one of the possible programms to run:
-    1. Local alignment
-    Enter two protein sequences in 1- letter encoding. The code will return alignment scores and 
-    sequences aligned on each other. 
+def main(*args: Tuple[Union[List[str], str], str]) -> dict:
+    """
+    This function provides the access to the following methods:
+    1. Local Alignment of two sequences - the last argument: 'local_alignment'
+       - needs at least 2 protein sequences 1-letter encoded.
+       When more than 2 sequences are passed, uses the first
+       entered sequence to align the rest on
+       - performs an alignment using Smith-Waterman algorithm
     2. ...
+    3. ...
+    4. ...
+    5. ...
+
+    Args:
+    *args - are supposed to be all sequences to process and the method
+    to process with.
+    The method is supposed to be the last argument.
 
+    Returns:
+    function_result - result of a chosen function
     """
-    pass
\ No newline at end of file
+
+    seqs_list, method, seq_on = check_input(*args)
+    print(seqs_list, method, seq_on)
+
+    match method:
+
+        case 'local_alignment':
+
+            alignment_dict: dict = {}
+            for seq_id, seq in enumerate(seqs_list):
+                function_result = local_alignment(seq_on=seq_on,
+                                                  seq2=seq,
+                                                  alignment_dict=alignment_dict,
+                                                  seq_id=seq_id,
+                                                  prettify=True)
+
+        case '':
+
+            pass
+
+        case _:
+
+            function_result = None
+
+    return function_result
+
+
+test = main("CGTAGTCGATGCTG", "AGTCGTACAT", "ATGRC", "local_alignment")
+print(test)

From 86f281b6af543897ea9d49cd66a68b8b985e06e1 Mon Sep 17 00:00:00 2001
From: Daria <nekrasovadasha22@mail.ru>
Date: Thu, 28 Sep 2023 19:18:50 +0300
Subject: [PATCH 04/30] start development from_proteins_seqs_to_rna function

---
 .idea/.gitignore                              |  3 ++
 .idea/HW4_Sapozhnikov.iml                     |  8 +++++
 .../inspectionProfiles/profiles_settings.xml  |  6 ++++
 .idea/misc.xml                                |  4 +++
 .idea/modules.xml                             |  8 +++++
 .idea/vcs.xml                                 |  6 ++++
 HW4_Sapozhnikov/prototool.py                  | 35 ++++++++++++++++++-
 7 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/HW4_Sapozhnikov.iml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/HW4_Sapozhnikov.iml b/.idea/HW4_Sapozhnikov.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/HW4_Sapozhnikov.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..a971a2c
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..acbf176
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/HW4_Sapozhnikov.iml" filepath="$PROJECT_DIR$/.idea/HW4_Sapozhnikov.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 98ca0a0..178b336 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -13,4 +13,37 @@ def main():
     2. ...
 
     """
-    pass
\ No newline at end of file
+    pass
+
+
+def from_proteins_seqs_to_rna(*seqs, tool='RNA'):
+    PROTEIN_TO_RNA_COMBINATION = {
+        'Ala': {'GCU', 'GCC', 'GCA', 'GCG'},
+        'Arg': {'CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'},
+        'Asn': {'AAU', 'AAC'},
+        'Asp': {'GAU', 'GAC'},
+        'Cys': {'UGU', 'UGC'},
+        'Glu': {'GAA', 'GAG'},
+        'Gln': {'CAA', 'CAG'},
+        'Gly': {'GGU', 'GGC', 'GGA', 'GGG'},
+        'His': {'CAU', 'CAC'},
+        'Ile': {'AUU', 'AUC', 'AUA'},
+        'Leu': {'CUU', 'CUC', 'CUA', 'CUG'},
+        'Lys': {'AAA', 'AAG'},
+        'Met': {'AUG'},
+        'Phe': {'UUU', 'UUC'},
+        'Pro': {'CCU', 'CCC', 'CCA', 'CCG'},
+        'Ser': {'UCU', 'UCC', 'UCA', 'UCG'},
+        'Thr': {'ACU', 'ACC', 'ACA', 'ACG'},
+        'Tyr': {'UAU', 'UAC'},
+        'Trp': {'UGG'},
+        'Val': {'GUU', 'GUC', 'GUA', 'GUG'},
+    }
+
+    if seqs[::3] in PROTEIN_TO_RNA_COMBINATION.keys():
+        if len()
+
+
+
+
+    pass

From 231efcc88a179e8baba9fb86b9fd76591bb077f2 Mon Sep 17 00:00:00 2001
From: Daria <nekrasovadasha22@mail.ru>
Date: Thu, 28 Sep 2023 21:49:50 +0300
Subject: [PATCH 05/30] add cycles converting proteins to RNA in
 from_proteins_seqs_to_rna function

---
 HW4_Sapozhnikov/for_test.py  |  6 ++++++
 HW4_Sapozhnikov/prototool.py | 30 ++++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)
 create mode 100644 HW4_Sapozhnikov/for_test.py

diff --git a/HW4_Sapozhnikov/for_test.py b/HW4_Sapozhnikov/for_test.py
new file mode 100644
index 0000000..53e75a5
--- /dev/null
+++ b/HW4_Sapozhnikov/for_test.py
@@ -0,0 +1,6 @@
+my_input = 'vla', 'ValTrpPhe', 'phe', 'vla', 'ValTrpPhe', 'phe'
+
+for aminoacids in my_input:
+    devided = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
+    print(devided)
+
diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 178b336..78ddebe 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -16,7 +16,7 @@ def main():
     pass
 
 
-def from_proteins_seqs_to_rna(*seqs, tool='RNA'):
+def from_proteins_seqs_to_rna(*seqs):
     PROTEIN_TO_RNA_COMBINATION = {
         'Ala': {'GCU', 'GCC', 'GCA', 'GCG'},
         'Arg': {'CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'},
@@ -40,10 +40,24 @@ def from_proteins_seqs_to_rna(*seqs, tool='RNA'):
         'Val': {'GUU', 'GUC', 'GUA', 'GUG'},
     }
 
-    if seqs[::3] in PROTEIN_TO_RNA_COMBINATION.keys():
-        if len()
-
-
-
-
-    pass
+    for aminoacids in seqs:
+        found_sets = []
+        divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
+        for divided_acid in divided_acids:
+            if divided_acid in PROTEIN_TO_RNA_COMBINATION.keys():
+                found_sets.append([])
+                for comb in PROTEIN_TO_RNA_COMBINATION[divided_acid]:
+                    found_sets[-1].append(comb)
+
+        for i in range(0, len(found_sets)):
+            for j in range(0, len(found_sets[i])):
+                combination = found_sets[i][j]
+                if len(found_sets) > 1:
+                    for k in range(0, len(found_sets)):
+                        if k != i:
+                            for m in range(0, len(found_sets[k])):
+                                combination += ' ' + found_sets[k][m]
+
+                    print(combination)
+
+from_proteins_seqs_to_rna('ValTyrMet')
\ No newline at end of file

From 09d4711b1bc56ce45fdee0023e0d58203ded58ae Mon Sep 17 00:00:00 2001
From: Daria <nekrasovadasha22@mail.ru>
Date: Fri, 29 Sep 2023 23:21:47 +0300
Subject: [PATCH 06/30] add from_proteins_seqs_to_rna and
 isoelectric_point_determination functions

---
 HW4_Sapozhnikov/for_test.py  |  6 ---
 HW4_Sapozhnikov/prototool.py | 87 +++++++++++++++++++++++++++---------
 2 files changed, 67 insertions(+), 26 deletions(-)
 delete mode 100644 HW4_Sapozhnikov/for_test.py

diff --git a/HW4_Sapozhnikov/for_test.py b/HW4_Sapozhnikov/for_test.py
deleted file mode 100644
index 53e75a5..0000000
--- a/HW4_Sapozhnikov/for_test.py
+++ /dev/null
@@ -1,6 +0,0 @@
-my_input = 'vla', 'ValTrpPhe', 'phe', 'vla', 'ValTrpPhe', 'phe'
-
-for aminoacids in my_input:
-    devided = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
-    print(devided)
-
diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 78ddebe..42bafdd 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -10,13 +10,19 @@ def main():
     1. Local alignment
     Enter two protein sequences in 1- letter encoding. The code will return alignment scores and 
     sequences aligned on each other. 
-    2. ...
+    2. Call method
 
     """
     pass
 
 
-def from_proteins_seqs_to_rna(*seqs):
+def from_proteins_seqs_to_rna(*seqs: str) -> dict:
+    """
+    :param seqs: strings with type 'ValTyrAla','AsnAspCys'. seqs is args parameter, so
+    you can pass more than one sequences at the time.
+    :return: dictionary, when [key] is your input protein sequences
+    and values are combinations of RNA codones, which encode proteins
+    """
     PROTEIN_TO_RNA_COMBINATION = {
         'Ala': {'GCU', 'GCC', 'GCA', 'GCG'},
         'Arg': {'CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'},
@@ -39,25 +45,66 @@ def from_proteins_seqs_to_rna(*seqs):
         'Trp': {'UGG'},
         'Val': {'GUU', 'GUC', 'GUA', 'GUG'},
     }
-
+    answer_dict = {}
     for aminoacids in seqs:
-        found_sets = []
+        rna_combination = ''
         divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
         for divided_acid in divided_acids:
             if divided_acid in PROTEIN_TO_RNA_COMBINATION.keys():
-                found_sets.append([])
-                for comb in PROTEIN_TO_RNA_COMBINATION[divided_acid]:
-                    found_sets[-1].append(comb)
-
-        for i in range(0, len(found_sets)):
-            for j in range(0, len(found_sets[i])):
-                combination = found_sets[i][j]
-                if len(found_sets) > 1:
-                    for k in range(0, len(found_sets)):
-                        if k != i:
-                            for m in range(0, len(found_sets[k])):
-                                combination += ' ' + found_sets[k][m]
-
-                    print(combination)
-
-from_proteins_seqs_to_rna('ValTyrMet')
\ No newline at end of file
+                rna_combination += next(iter(PROTEIN_TO_RNA_COMBINATION[divided_acid]))
+        answer_dict[aminoacids] = rna_combination
+    return answer_dict
+
+def isoelectric_point_determination(*seqs: str) -> dict:
+    """
+    :param seqs: strings with type 'ValTyrAla','AsnAspCys'. seqs is args parameter, so
+    you can pass more than one sequences at the time.
+    :return: dictionary, when [key] is your input protein sequence and value is an isoelectric point
+    of your input proteins
+    """
+    PKA_AMINOACIDS = {
+        'Ala': [2.34, 9.69],
+        'Arg': [2.17, 9.04, 12.68],
+        'Asn': [1.88, 9.60, 3.65],
+        'Asp': [1.88, 9.60, 3.65],
+        'Cys': [1.96, 10.28, 8.18],
+        'Glu': [2.19, 9.67, 4.25],
+        'Gln': [2.17, 9.13],
+        'Gly': [2.34, 9.60],
+        'His': [1.82, 9.17],
+        'Ile': [2.36, 9.68],
+        'Leu': [2.36, 9.60],
+        'Lys': [2.18, 8.95, 10.53],
+        'Met': [2.28, 9.21],
+        'Phe': [2.20, 9.13],
+        'Pro': [1.99, 10.96],
+        'Ser': [2.21, 9.15],
+        'Thr': [2.11, 9.62],
+        'Tyr': [2.20, 9.11, 10.07],
+        'Trp': [2.38, 9.39],
+        'Val': [2.32, 9.62],
+    }
+
+    answer_dictionary = {}
+
+    for aminoacids in seqs:
+        divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
+        for divided_acid in divided_acids:
+            if not divided_acid in PKA_AMINOACIDS.keys():
+                raise ValueError('Non-protein aminoacids in sequence')
+
+        isoelectric_point_mean = 0
+        count_groups = 0
+        for acid_index in range(0, len(divided_acids)):
+            if acid_index == 0:
+                isoelectric_point_mean += PKA_AMINOACIDS[divided_acids[acid_index]][0]
+                count_groups += 1
+            elif acid_index == len(divided_acids) - 1:
+                isoelectric_point_mean = isoelectric_point_mean + PKA_AMINOACIDS[divided_acids[acid_index]][-1]
+                count_groups += 1
+            else:
+                if len(PKA_AMINOACIDS[divided_acids[acid_index]]) > 2:
+                    isoelectric_point_mean = isoelectric_point_mean + PKA_AMINOACIDS[divided_acids[acid_index]][1]
+                    count_groups += 1
+        answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups
+    return answer_dictionary

From 1e98426389717a2b368efc9fcf6ceddd77c446a2 Mon Sep 17 00:00:00 2001
From: Daria <nekrasovadasha22@mail.ru>
Date: Fri, 29 Sep 2023 23:55:35 +0300
Subject: [PATCH 07/30] add gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1c2d52b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea/*

From d6f1bfdba00bba4609264374fed5cbd6e9e87b80 Mon Sep 17 00:00:00 2001
From: Daria <nekrasovadasha22@mail.ru>
Date: Fri, 29 Sep 2023 23:57:56 +0300
Subject: [PATCH 08/30] remove excess files

---
 .idea/.gitignore                               | 3 ---
 .idea/HW4_Sapozhnikov.iml                      | 8 --------
 .idea/inspectionProfiles/profiles_settings.xml | 6 ------
 .idea/misc.xml                                 | 4 ----
 .idea/modules.xml                              | 8 --------
 .idea/vcs.xml                                  | 6 ------
 6 files changed, 35 deletions(-)
 delete mode 100644 .idea/.gitignore
 delete mode 100644 .idea/HW4_Sapozhnikov.iml
 delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/modules.xml
 delete mode 100644 .idea/vcs.xml

diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 26d3352..0000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
diff --git a/.idea/HW4_Sapozhnikov.iml b/.idea/HW4_Sapozhnikov.iml
deleted file mode 100644
index d0876a7..0000000
--- a/.idea/HW4_Sapozhnikov.iml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index a971a2c..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index acbf176..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/HW4_Sapozhnikov.iml" filepath="$PROJECT_DIR$/.idea/HW4_Sapozhnikov.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 35eb1dd..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file

From f5e4308b0683e0121475c695f9aad59bde7c614f Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Fri, 29 Sep 2023 21:36:40 +0000
Subject: [PATCH 09/30] Minor code revision

---
 HW4_Sapozhnikov/prototool.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 42bafdd..00d0d6d 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -20,8 +20,8 @@ def from_proteins_seqs_to_rna(*seqs: str) -> dict:
     """
     :param seqs: strings with type 'ValTyrAla','AsnAspCys'. seqs is args parameter, so
     you can pass more than one sequences at the time.
-    :return: dictionary, when [key] is your input protein sequences
-    and values are combinations of RNA codones, which encode proteins
+    :return: dictionary, where [key] is your input protein sequences
+    and values are combinations of RNA codones, which encode this protein
     """
     PROTEIN_TO_RNA_COMBINATION = {
         'Ala': {'GCU', 'GCC', 'GCA', 'GCG'},
@@ -45,15 +45,15 @@ def from_proteins_seqs_to_rna(*seqs: str) -> dict:
         'Trp': {'UGG'},
         'Val': {'GUU', 'GUC', 'GUA', 'GUG'},
     }
-    answer_dict = {}
+    answer_dictionary = {}
     for aminoacids in seqs:
         rna_combination = ''
         divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
         for divided_acid in divided_acids:
             if divided_acid in PROTEIN_TO_RNA_COMBINATION.keys():
                 rna_combination += next(iter(PROTEIN_TO_RNA_COMBINATION[divided_acid]))
-        answer_dict[aminoacids] = rna_combination
-    return answer_dict
+        answer_dictionary[aminoacids] = rna_combination
+    return answer_dictionary
 
 def isoelectric_point_determination(*seqs: str) -> dict:
     """

From c318a3ac20a4fd592fb67d9f95424a13d192c1f4 Mon Sep 17 00:00:00 2001
From: Nikita <nikita.sapozhnikov1@gmail.com>
Date: Sat, 30 Sep 2023 14:31:35 +0300
Subject: [PATCH 10/30] Add recode() function

---
 HW4_Sapozhnikov/prototool.py | 191 +++++++++++++++++++++++------------
 1 file changed, 129 insertions(+), 62 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 4bb7547..1a74584 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -1,6 +1,57 @@
+"""
+This is a prototool.
+"""
+
 from typing import List, Optional, Tuple, Union
 
 
+def recode(*seq: Union[List[str], str]) -> dict:
+    """
+    Translate 1-letter to 3-letter encoding if 1-letter
+    encoded sequence is given and vice versa.
+
+    Args:
+    - seq - sequence or list of sequences to recode
+
+    Returns:
+    - function_result - a dictionary containing recoded sequences as values
+    for original sequences keys
+    """
+
+    to_1_dictionary = {
+        'Ala': 'A', 'Arg': 'R', 'Asn': 'N', 'Asp': 'D',
+        'Cys': 'C', 'Gln': 'Q', 'Glu': 'E', 'Gly': 'G',
+        'His': 'H', 'Ile': 'I', 'Leu': 'L', 'Lys': 'K',
+        'Met': 'M', 'Phe': 'F', 'Pro': 'P', 'Ser': 'S',
+        'Thr': 'T', 'Trp': 'W', 'Tyr': 'Y', 'Val': 'V'
+}
+
+    to_3_dictionary = {v: k for k, v in to_1_dictionary.items()}
+
+    function_result = {}
+
+    for sequence in seq:
+        # Check if the input sequence is in 1-letter or 3-letter format
+        is_one_letter = all(aa.isalpha() and aa.isupper() for aa in sequence)
+
+        if is_one_letter:
+            # Translate 1-letter to 3-letter coded sequence
+            three_letter_sequence = ""
+            for aa in sequence:
+                three_letter_code = to_3_dictionary.get(aa, aa)
+                three_letter_sequence += three_letter_code
+            function_result[sequence] = three_letter_sequence
+        else:
+            # Translate 3-letter to 1-letter coded sequence
+            one_letter_sequence = ""
+            for aa in range(0, len(sequence), 3):
+                amino_acid = sequence[aa:aa+3]
+                one_letter_sequence += to_1_dictionary.get(amino_acid,
+                                                           amino_acid)
+            function_result[sequence] = one_letter_sequence
+    return function_result
+
+
 def prettify_alignment(aligned_seq_on: str, aligned_seq2: str) -> None:
     """
     Prettifies alignment output by printing out two
@@ -180,64 +231,11 @@ def check_input(*args: List[str]) -> Tuple[List[str],
             return seqs_list, method, seq_on
 
 
-def main(*args: Tuple[Union[List[str], str], str]) -> dict:
-    """
-    This function provides the access to the following methods:
-    1. Local Alignment of two sequences - the last argument: 'local_alignment'
-       - needs at least 2 protein sequences 1-letter encoded.
-       When more than 2 sequences are passed, uses the first
-       entered sequence to align the rest on
-       - performs an alignment using Smith-Waterman algorithm
-    2. ...
-    3. ...
-    4. ...
-    5. ...
-
-    Args:
-    *args - are supposed to be all sequences to process and the method
-    to process with.
-    The method is supposed to be the last argument.
-=======
-    To get started choose one of the possible programms to run:
-    1. Local alignment
-    Enter two protein sequences in 1- letter encoding. The code will return alignment scores and 
-    sequences aligned on each other. 
-    2. Call method
-
-    Returns:
-    function_result - result of a chosen function
-    """
-
-    seqs_list, method, seq_on = check_input(*args)
-    print(seqs_list, method, seq_on)
-
-    match method:
-
-        case 'local_alignment':
-
-            alignment_dict: dict = {}
-            for seq_id, seq in enumerate(seqs_list):
-                function_result = local_alignment(seq_on=seq_on,
-                                                  seq2=seq,
-                                                  alignment_dict=alignment_dict,
-                                                  seq_id=seq_id,
-                                                  prettify=True)
-
-        case '':
-
-            pass
-
-        case _:
-
-            function_result = None
-
-    return function_result
-
-
 def from_proteins_seqs_to_rna(*seqs: str) -> dict:
     """
-    :param seqs: strings with type 'ValTyrAla','AsnAspCys'. seqs is args parameter, so
-    you can pass more than one sequences at the time.
+    :param seqs: strings with type 'ValTyrAla','AsnAspCys'.
+    seqs is args parameter, so you can pass more than one
+    sequences at the time.
     :return: dictionary, where [key] is your input protein sequences
     and values are combinations of RNA codones, which encode this protein
     """
@@ -276,10 +274,11 @@ def from_proteins_seqs_to_rna(*seqs: str) -> dict:
  
 def isoelectric_point_determination(*seqs: str) -> dict:
     """
-    :param seqs: strings with type 'ValTyrAla','AsnAspCys'. seqs is args parameter, so
-    you can pass more than one sequences at the time.
-    :return: dictionary, when [key] is your input protein sequence and value is an isoelectric point
-    of your input proteins
+    :param seqs: strings with type 'ValTyrAla','AsnAspCys'.
+    seqs is args parameter, so you can pass more than one
+    sequences at a time.
+    :return: dictionary, where [key] is your input protein sequence and value
+    is an isoelectric point of your input proteins
     """
     PKA_AMINOACIDS = {
         'Ala': [2.34, 9.69],
@@ -309,7 +308,7 @@ def isoelectric_point_determination(*seqs: str) -> dict:
     for aminoacids in seqs:
         divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
         for divided_acid in divided_acids:
-            if not divided_acid in PKA_AMINOACIDS.keys():
+            if divided_acid not in PKA_AMINOACIDS.keys():
                 raise ValueError('Non-protein aminoacids in sequence')
 
         isoelectric_point_mean = 0
@@ -326,4 +325,72 @@ def isoelectric_point_determination(*seqs: str) -> dict:
                     isoelectric_point_mean = isoelectric_point_mean + PKA_AMINOACIDS[divided_acids[acid_index]][1]
                     count_groups += 1
         answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups
-    return answer_dictionary
\ No newline at end of file
+    return answer_dictionary
+
+
+def main(*args: Tuple[Union[List[str], str], str]) -> dict:
+    """
+    This function provides the access to the following methods:
+
+    1. Translate 1 letter to 3 letter encoding and vice versa - the last
+    argument: 'recode'
+        - needs at least 1 sequence 1- or 3- letter encoded. Can recive
+        more than 1 sequences
+        - returns a dictionary containing translations between 1- and 3-
+        letter codes
+
+    2. Local Alignment of two sequences - the last argument: 'local_alignment'
+       - needs at least 2 protein sequences 1-letter encoded.
+       When more than 2 sequences are passed, uses the first
+       entered sequence to align the rest on
+       - performs an alignment using Smith-Waterman algorithm
+
+    3. Find all possible RNA sequences for defined protein sequence - the
+    last argument: from_proteins_seqs_to_rna
+        - needs at least 1 protein sequence 3-letter encoded
+        - returns a dictionary, where key is your input protein sequences
+        and values are combinations of RNA codones, which encode this protein
+
+    4. Determinate isoelectric point - the last argument:
+    'isoelectric_point_determination'
+        - needs an input containing at least 1 aminoacid. Can recive multiple
+        different protein sequences
+        - returns a dictionary, where key is your input protein sequence and
+        value is an isoelectric point of this protein
+
+    4. ...
+    5. ...
+
+    Args:
+    *args - are supposed to be all sequences to process and the method
+    to process with.
+    The method is supposed to be the last argument.
+
+    Returns:
+    function_result - result of a chosen function
+    """
+
+    seqs_list, method, seq_on = check_input(*args)
+    print(seqs_list, method, seq_on)
+
+    match method:
+
+        case 'local_alignment':
+
+            alignment_dict: dict = {}
+            for seq_id, seq in enumerate(seqs_list):
+                function_result = local_alignment(seq_on=seq_on,
+                                                  seq2=seq,
+                                                  alignment_dict=alignment_dict,
+                                                  seq_id=seq_id,
+                                                  prettify=True)
+
+        case '':
+
+            pass
+
+        case _:
+
+            function_result = None
+
+    return function_result

From 96e209da79167c13122aa33715976f8200d53bc2 Mon Sep 17 00:00:00 2001
From: Daria <nekrasovadasha22@mail.ru>
Date: Sat, 30 Sep 2023 17:28:36 +0300
Subject: [PATCH 11/30] add raise ValueError in from_proteins_seqs_to_rna
 function, add line breaks

---
 HW4_Sapozhnikov/prototool.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 4bb7547..5873d1a 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -270,6 +270,8 @@ def from_proteins_seqs_to_rna(*seqs: str) -> dict:
         for divided_acid in divided_acids:
             if divided_acid in PROTEIN_TO_RNA_COMBINATION.keys():
                 rna_combination += next(iter(PROTEIN_TO_RNA_COMBINATION[divided_acid]))
+            else:
+                raise ValueError('Non-protein aminoacids in sequence')
         answer_dictionary[aminoacids] = rna_combination
     return answer_dictionary
 
@@ -316,14 +318,17 @@ def isoelectric_point_determination(*seqs: str) -> dict:
         count_groups = 0
         for acid_index in range(0, len(divided_acids)):
             if acid_index == 0:
-                isoelectric_point_mean += PKA_AMINOACIDS[divided_acids[acid_index]][0]
+                isoelectric_point_mean\
+                    += PKA_AMINOACIDS[divided_acids[acid_index]][0]
                 count_groups += 1
             elif acid_index == len(divided_acids) - 1:
-                isoelectric_point_mean = isoelectric_point_mean + PKA_AMINOACIDS[divided_acids[acid_index]][-1]
+                isoelectric_point_mean = (isoelectric_point_mean
+                                          + PKA_AMINOACIDS[divided_acids[acid_index]][-1])
                 count_groups += 1
             else:
                 if len(PKA_AMINOACIDS[divided_acids[acid_index]]) > 2:
-                    isoelectric_point_mean = isoelectric_point_mean + PKA_AMINOACIDS[divided_acids[acid_index]][1]
+                    isoelectric_point_mean = (isoelectric_point_mean
+                                              + PKA_AMINOACIDS[divided_acids[acid_index]][1])
                     count_groups += 1
         answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups
-    return answer_dictionary
\ No newline at end of file
+    return answer_dictionary

From 703249c228a75c50c689e43877fbf46caf01a5b5 Mon Sep 17 00:00:00 2001
From: Alina <alina.potyseva@yandex.ru>
Date: Sat, 30 Sep 2023 18:49:54 +0300
Subject: [PATCH 12/30] Add back_transcribe function

---
 HW4_Sapozhnikov/prototool.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 5873d1a..9b603d2 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -332,3 +332,21 @@ def isoelectric_point_determination(*seqs: str) -> dict:
                     count_groups += 1
         answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups
     return answer_dictionary
+    
+TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
+
+def back_transcribe(*seqs: str) -> dict:
+    """
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns a dictonary, which [key] is inputed protein
+    sequence and values are DNA codons
+    """
+    result = {}
+    for seq in seqs:
+        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
+        for i in range(len(rna)):
+            if rna[i] in TRANSCRIBE_DICT.keys():
+                rna[i] = TRANSCRIBE_DICT[rna[i]]
+        result[seq] = "".join(rna)
+    return result

From 1ba062bc51518af6b70f56add5d65d3097b4483d Mon Sep 17 00:00:00 2001
From: Alina <alina.potyseva@yandex.ru>
Date: Sat, 30 Sep 2023 18:57:49 +0300
Subject: [PATCH 13/30] Add gc_content function

---
 HW4_Sapozhnikov/prototool.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 9b603d2..65b20ec 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -350,3 +350,16 @@ def back_transcribe(*seqs: str) -> dict:
                 rna[i] = TRANSCRIBE_DICT[rna[i]]
         result[seq] = "".join(rna)
     return result
+    
+def count_gc_content(*seqs: str) -> dict:
+    '''
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns GC-content of DNA sequence, which encodes the protein
+    '''
+    result = {}
+    for seq in seqs:
+        dna = list((back_transcribe(seq)).get(seq))
+        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
+        result[seq] = gc_content
+    return result

From b21741c33e1c1b080070711e4d52bde7c0cc8f26 Mon Sep 17 00:00:00 2001
From: Alina <alina.potyseva@yandex.ru>
Date: Sat, 30 Sep 2023 19:03:57 +0300
Subject: [PATCH 14/30] Add count_protein_molecular_weigh function

---
 HW4_Sapozhnikov/prototool.py | 40 ++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 65b20ec..f6045dd 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -363,3 +363,43 @@ def count_gc_content(*seqs: str) -> dict:
         gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
         result[seq] = gc_content
     return result
+    
+MOLECULAR_WEIGHTS = {
+    'Ala': 89,
+    'Cys': 121,
+    'Asp': 133,
+    'Glu': 147,
+    'Phe': 165,
+    'Gly': 75,
+    'His': 155,
+    'Ile': 131,
+    'Lys': 146,
+    'Leu': 131,
+    'Met': 149,
+    'Asn': 132,
+    'Pro': 115,
+    'Gln': 146,
+    'Arg': 174,
+    'Ser': 105,
+    'Thr': 119,
+    'Val': 117,
+    'Trp': 204,
+    'Tyr': 181}
+
+def count_protein_molecular_weight(*seqs: str) -> dict:
+    """
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace
+    (f.g. 'AlaSer'). You can put as many arguments as you wish.
+    :return: This function returns molecular weight of the protein.
+    """
+    result = {}
+    for seq in seqs:
+        protein_weight = 0
+        aminoacids = [seq[i:i + 3] for i in range(0, len(seq), 3)]
+        for i in range(len(aminoacids)):
+            if aminoacids[i] in MOLECULAR_WEIGHTS.keys():
+                aminoacid_weight = MOLECULAR_WEIGHTS[aminoacids[i]]
+                protein_weight += aminoacid_weight
+                result[seq] = protein_weight
+    return result
+

From 463dbf3c7e5a2c8b0500250d93e1d004d33cc508 Mon Sep 17 00:00:00 2001
From: Nikita <nikita.sapozhnikov1@gmail.com>
Date: Sat, 30 Sep 2023 19:50:35 +0300
Subject: [PATCH 15/30] Add recode() function

---
 HW4_Sapozhnikov/prototool.py | 68 +++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 1a74584..6a364f9 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -5,7 +5,7 @@
 from typing import List, Optional, Tuple, Union
 
 
-def recode(*seq: Union[List[str], str]) -> dict:
+def recode(seq: str) -> dict:
     """
     Translate 1-letter to 3-letter encoding if 1-letter
     encoded sequence is given and vice versa.
@@ -18,38 +18,33 @@ def recode(*seq: Union[List[str], str]) -> dict:
     for original sequences keys
     """
 
-    to_1_dictionary = {
+    TO_1_dict = {
         'Ala': 'A', 'Arg': 'R', 'Asn': 'N', 'Asp': 'D',
         'Cys': 'C', 'Gln': 'Q', 'Glu': 'E', 'Gly': 'G',
         'His': 'H', 'Ile': 'I', 'Leu': 'L', 'Lys': 'K',
         'Met': 'M', 'Phe': 'F', 'Pro': 'P', 'Ser': 'S',
         'Thr': 'T', 'Trp': 'W', 'Tyr': 'Y', 'Val': 'V'
-}
-
-    to_3_dictionary = {v: k for k, v in to_1_dictionary.items()}
+    }
 
-    function_result = {}
+    TO_3_dict = {v: k for k, v in TO_1_dict.items()}
 
-    for sequence in seq:
-        # Check if the input sequence is in 1-letter or 3-letter format
-        is_one_letter = all(aa.isalpha() and aa.isupper() for aa in sequence)
+    # Check if the input sequence is in 1-letter or 3-letter format
+    is_one_letter = all(aa.isalpha() and aa.isupper() for aa in seq)
 
-        if is_one_letter:
-            # Translate 1-letter to 3-letter coded sequence
-            three_letter_sequence = ""
-            for aa in sequence:
-                three_letter_code = to_3_dictionary.get(aa, aa)
-                three_letter_sequence += three_letter_code
-            function_result[sequence] = three_letter_sequence
-        else:
-            # Translate 3-letter to 1-letter coded sequence
-            one_letter_sequence = ""
-            for aa in range(0, len(sequence), 3):
-                amino_acid = sequence[aa:aa+3]
-                one_letter_sequence += to_1_dictionary.get(amino_acid,
-                                                           amino_acid)
-            function_result[sequence] = one_letter_sequence
-    return function_result
+    if is_one_letter:
+        # Translate 1-letter to 3-letter coded sequence
+        three_letter_sequence = ""
+        for aa in seq:
+            three_letter_code = TO_3_dict.get(aa, aa)
+            three_letter_sequence += three_letter_code
+        return three_letter_sequence
+    # Translate 3-letter to 1-letter coded sequence
+    one_letter_sequence = ""
+    for aa in range(0, len(seq), 3):
+        amino_acid = seq[aa:aa+3]
+        one_letter_sequence += TO_1_dict.get(amino_acid,
+                                             amino_acid)
+    return one_letter_sequence
 
 
 def prettify_alignment(aligned_seq_on: str, aligned_seq2: str) -> None:
@@ -219,7 +214,11 @@ def check_input(*args: List[str]) -> Tuple[List[str],
     else:
         # Check the last element of the input is a valid method
         method = args[-1]
-        if method not in ['local_alignment', '', '', '', '']:
+        if method not in ['recode',
+                          'local_alignment',
+                          'from_proteins_seqs_to_rna',
+                          'isoelectric_point_determination',
+                          '']:
             raise ValueError(method, " is not a valid method.")
         else:
             # Form a list with sequences from the input
@@ -264,14 +263,16 @@ def from_proteins_seqs_to_rna(*seqs: str) -> dict:
     answer_dictionary = {}
     for aminoacids in seqs:
         rna_combination = ''
-        divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
+        divided_acids = [aminoacids[i:i + 3] for i in range(0,
+                                                            len(aminoacids),
+                                                            3)]
         for divided_acid in divided_acids:
             if divided_acid in PROTEIN_TO_RNA_COMBINATION.keys():
                 rna_combination += next(iter(PROTEIN_TO_RNA_COMBINATION[divided_acid]))
         answer_dictionary[aminoacids] = rna_combination
     return answer_dictionary
 
- 
+
 def isoelectric_point_determination(*seqs: str) -> dict:
     """
     :param seqs: strings with type 'ValTyrAla','AsnAspCys'.
@@ -371,12 +372,21 @@ def main(*args: Tuple[Union[List[str], str], str]) -> dict:
     """
 
     seqs_list, method, seq_on = check_input(*args)
-    print(seqs_list, method, seq_on)
+    print(f'Your sequences are: {seqs_list}',
+          f'The method is: {method}', sep='\n')
 
     match method:
 
+        case 'recode':
+
+            recode_dict: dict = {}
+            for seq in seqs_list:
+                recode_dict[seq] = recode(seq=seq)
+            return recode_dict
+
         case 'local_alignment':
 
+            print('The sequence align on: ', seq_on)
             alignment_dict: dict = {}
             for seq_id, seq in enumerate(seqs_list):
                 function_result = local_alignment(seq_on=seq_on,

From a29692d281cc6ab1880d0782930ab86667454cb8 Mon Sep 17 00:00:00 2001
From: Alina <alina.potyseva@yandex.ru>
Date: Sat, 30 Sep 2023 20:09:24 +0300
Subject: [PATCH 16/30] changed order of functions

---
 HW4_Sapozhnikov/prototool.py | 139 +++++++++++++++++------------------
 1 file changed, 69 insertions(+), 70 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index f6045dd..bbc4294 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -179,6 +179,75 @@ def check_input(*args: List[str]) -> Tuple[List[str],
             seq_on = None
             return seqs_list, method, seq_on
 
+TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
+
+def back_transcribe(*seqs: str) -> dict:
+    """
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns a dictonary, which [key] is inputed protein
+    sequence and values are DNA codons
+    """
+    result = {}
+    for seq in seqs:
+        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
+        for i in range(len(rna)):
+            if rna[i] in TRANSCRIBE_DICT.keys():
+                rna[i] = TRANSCRIBE_DICT[rna[i]]
+        result[seq] = "".join(rna)
+    return result
+    
+def count_gc_content(*seqs: str) -> dict:
+    '''
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns GC-content of DNA sequence, which encodes the protein
+    '''
+    result = {}
+    for seq in seqs:
+        dna = list((back_transcribe(seq)).get(seq))
+        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
+        result[seq] = gc_content
+    return result
+    
+MOLECULAR_WEIGHTS = {
+    'Ala': 89,
+    'Cys': 121,
+    'Asp': 133,
+    'Glu': 147,
+    'Phe': 165,
+    'Gly': 75,
+    'His': 155,
+    'Ile': 131,
+    'Lys': 146,
+    'Leu': 131,
+    'Met': 149,
+    'Asn': 132,
+    'Pro': 115,
+    'Gln': 146,
+    'Arg': 174,
+    'Ser': 105,
+    'Thr': 119,
+    'Val': 117,
+    'Trp': 204,
+    'Tyr': 181}
+
+def count_protein_molecular_weight(*seqs: str) -> dict:
+    """
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace
+    (f.g. 'AlaSer'). You can put as many arguments as you wish.
+    :return: This function returns molecular weight of the protein.
+    """
+    result = {}
+    for seq in seqs:
+        protein_weight = 0
+        aminoacids = [seq[i:i + 3] for i in range(0, len(seq), 3)]
+        for i in range(len(aminoacids)):
+            if aminoacids[i] in MOLECULAR_WEIGHTS.keys():
+                aminoacid_weight = MOLECULAR_WEIGHTS[aminoacids[i]]
+                protein_weight += aminoacid_weight
+                result[seq] = protein_weight
+    return result
 
 def main(*args: Tuple[Union[List[str], str], str]) -> dict:
     """
@@ -332,74 +401,4 @@ def isoelectric_point_determination(*seqs: str) -> dict:
                     count_groups += 1
         answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups
     return answer_dictionary
-    
-TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
-
-def back_transcribe(*seqs: str) -> dict:
-    """
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns a dictonary, which [key] is inputed protein
-    sequence and values are DNA codons
-    """
-    result = {}
-    for seq in seqs:
-        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
-        for i in range(len(rna)):
-            if rna[i] in TRANSCRIBE_DICT.keys():
-                rna[i] = TRANSCRIBE_DICT[rna[i]]
-        result[seq] = "".join(rna)
-    return result
-    
-def count_gc_content(*seqs: str) -> dict:
-    '''
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns GC-content of DNA sequence, which encodes the protein
-    '''
-    result = {}
-    for seq in seqs:
-        dna = list((back_transcribe(seq)).get(seq))
-        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
-        result[seq] = gc_content
-    return result
-    
-MOLECULAR_WEIGHTS = {
-    'Ala': 89,
-    'Cys': 121,
-    'Asp': 133,
-    'Glu': 147,
-    'Phe': 165,
-    'Gly': 75,
-    'His': 155,
-    'Ile': 131,
-    'Lys': 146,
-    'Leu': 131,
-    'Met': 149,
-    'Asn': 132,
-    'Pro': 115,
-    'Gln': 146,
-    'Arg': 174,
-    'Ser': 105,
-    'Thr': 119,
-    'Val': 117,
-    'Trp': 204,
-    'Tyr': 181}
-
-def count_protein_molecular_weight(*seqs: str) -> dict:
-    """
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace
-    (f.g. 'AlaSer'). You can put as many arguments as you wish.
-    :return: This function returns molecular weight of the protein.
-    """
-    result = {}
-    for seq in seqs:
-        protein_weight = 0
-        aminoacids = [seq[i:i + 3] for i in range(0, len(seq), 3)]
-        for i in range(len(aminoacids)):
-            if aminoacids[i] in MOLECULAR_WEIGHTS.keys():
-                aminoacid_weight = MOLECULAR_WEIGHTS[aminoacids[i]]
-                protein_weight += aminoacid_weight
-                result[seq] = protein_weight
-    return result
 

From 6ce8cf8f601ba9291fe10cad2a1b720c9003b3df Mon Sep 17 00:00:00 2001
From: Alina <alina.potyseva@gmail.com>
Date: Sat, 30 Sep 2023 20:31:54 +0300
Subject: [PATCH 17/30] Changed order of functions

---
 HW4_Sapozhnikov/prototool.py | 62 ++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index bbc4294..3f42045 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -179,37 +179,6 @@ def check_input(*args: List[str]) -> Tuple[List[str],
             seq_on = None
             return seqs_list, method, seq_on
 
-TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
-
-def back_transcribe(*seqs: str) -> dict:
-    """
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns a dictonary, which [key] is inputed protein
-    sequence and values are DNA codons
-    """
-    result = {}
-    for seq in seqs:
-        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
-        for i in range(len(rna)):
-            if rna[i] in TRANSCRIBE_DICT.keys():
-                rna[i] = TRANSCRIBE_DICT[rna[i]]
-        result[seq] = "".join(rna)
-    return result
-    
-def count_gc_content(*seqs: str) -> dict:
-    '''
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns GC-content of DNA sequence, which encodes the protein
-    '''
-    result = {}
-    for seq in seqs:
-        dna = list((back_transcribe(seq)).get(seq))
-        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
-        result[seq] = gc_content
-    return result
-    
 MOLECULAR_WEIGHTS = {
     'Ala': 89,
     'Cys': 121,
@@ -248,6 +217,37 @@ def count_protein_molecular_weight(*seqs: str) -> dict:
                 protein_weight += aminoacid_weight
                 result[seq] = protein_weight
     return result
+    
+TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
+
+def back_transcribe(*seqs: str) -> dict:
+    """
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns a dictonary, which [key] is inputed protein
+    sequence and values are DNA codons
+    """
+    result = {}
+    for seq in seqs:
+        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
+        for i in range(len(rna)):
+            if rna[i] in TRANSCRIBE_DICT.keys():
+                rna[i] = TRANSCRIBE_DICT[rna[i]]
+        result[seq] = "".join(rna)
+    return result
+    
+def count_gc_content(*seqs: str) -> dict:
+    '''
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns GC-content of DNA sequence, which encodes the protein
+    '''
+    result = {}
+    for seq in seqs:
+        dna = list((back_transcribe(seq)).get(seq))
+        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
+        result[seq] = gc_content
+    return result
 
 def main(*args: Tuple[Union[List[str], str], str]) -> dict:
     """

From 5cc5a9be2b13edcdd5b83f210e0c9ff057f69e8c Mon Sep 17 00:00:00 2001
From: Alina <alina.potyseva@gmail.com>
Date: Sat, 30 Sep 2023 20:45:44 +0300
Subject: [PATCH 18/30] changed order of functions

---
 HW4_Sapozhnikov/prototool.py | 62 ++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 3f42045..8df43c5 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -178,6 +178,37 @@ def check_input(*args: List[str]) -> Tuple[List[str],
                 return seqs_list, method, seq_on
             seq_on = None
             return seqs_list, method, seq_on
+    
+TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
+
+def back_transcribe(*seqs: str) -> dict:
+    """
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns a dictonary, which [key] is inputed protein
+    sequence and values are DNA codons
+    """
+    result = {}
+    for seq in seqs:
+        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
+        for i in range(len(rna)):
+            if rna[i] in TRANSCRIBE_DICT.keys():
+                rna[i] = TRANSCRIBE_DICT[rna[i]]
+        result[seq] = "".join(rna)
+    return result
+    
+def count_gc_content(*seqs: str) -> dict:
+    '''
+    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
+    You can put as many arguments as you wish.
+    :return: THis function returns GC-content of DNA sequence, which encodes the protein
+    '''
+    result = {}
+    for seq in seqs:
+        dna = list((back_transcribe(seq)).get(seq))
+        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
+        result[seq] = gc_content
+    return result
 
 MOLECULAR_WEIGHTS = {
     'Ala': 89,
@@ -218,37 +249,6 @@ def count_protein_molecular_weight(*seqs: str) -> dict:
                 result[seq] = protein_weight
     return result
     
-TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
-
-def back_transcribe(*seqs: str) -> dict:
-    """
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns a dictonary, which [key] is inputed protein
-    sequence and values are DNA codons
-    """
-    result = {}
-    for seq in seqs:
-        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
-        for i in range(len(rna)):
-            if rna[i] in TRANSCRIBE_DICT.keys():
-                rna[i] = TRANSCRIBE_DICT[rna[i]]
-        result[seq] = "".join(rna)
-    return result
-    
-def count_gc_content(*seqs: str) -> dict:
-    '''
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns GC-content of DNA sequence, which encodes the protein
-    '''
-    result = {}
-    for seq in seqs:
-        dna = list((back_transcribe(seq)).get(seq))
-        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
-        result[seq] = gc_content
-    return result
-
 def main(*args: Tuple[Union[List[str], str], str]) -> dict:
     """
     This function provides the access to the following methods:

From be1abc52bb8049bfaae5b66ca37ef2398252e3dd Mon Sep 17 00:00:00 2001
From: Nikita <nikita.sapozhnikov1@gmail.com>
Date: Sun, 1 Oct 2023 10:57:59 +0300
Subject: [PATCH 19/30] Major code review and merging all functions together

---
 HW4_Sapozhnikov/prototool.py | 546 +++++++++++++++++++----------------
 1 file changed, 290 insertions(+), 256 deletions(-)

diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py
index 7fb8327..dd2d851 100644
--- a/HW4_Sapozhnikov/prototool.py
+++ b/HW4_Sapozhnikov/prototool.py
@@ -1,10 +1,121 @@
 """
-This is a prototool.
+This is a prototool. WE ARE SORRY!!!
 """
 
 from typing import List, Optional, Tuple, Union
 
 
+AMINOACIDS_DICT = {
+    'Ala': {'TO_1': 'A',
+            'PROTEIN_TO_RNA_COMBINATION': {'GCU', 'GCC', 'GCA', 'GCG'},
+            'PKA_AMINOACIDS': [2.34, 9.69],
+            'MOLECULAR_WEIGHTS': 89},
+    'Arg': {'TO_1': 'R',
+            'PROTEIN_TO_RNA_COMBINATION': {'CGU', 'CGC', 'CGA', 'CGG', 'AGA',
+                                           'AGG'},
+            'PKA_AMINOACIDS': [2.17, 9.04, 12.68],
+            'MOLECULAR_WEIGHTS': 174},
+    'Asn': {'TO_1': 'N',
+            'PROTEIN_TO_RNA_COMBINATION': {'AAU', 'AAC'},
+            'PKA_AMINOACIDS': [1.88, 9.60, 3.65],
+            'MOLECULAR_WEIGHTS': 132},
+    'Asp': {'TO_1': 'D',
+            'PROTEIN_TO_RNA_COMBINATION': {'GAU', 'GAC'},
+            'PKA_AMINOACIDS': [1.88, 9.60, 3.65],
+            'MOLECULAR_WEIGHTS': 133},
+    'Cys': {'TO_1': 'C',
+            'PROTEIN_TO_RNA_COMBINATION': {'UGU', 'UGC'},
+            'PKA_AMINOACIDS': [1.96, 10.28, 8.18],
+            'MOLECULAR_WEIGHTS': 121},
+    'Glu': {'TO_1': 'Q',
+            'PROTEIN_TO_RNA_COMBINATION': {'GAA', 'GAG'},
+            'PKA_AMINOACIDS': [2.19, 9.67, 4.25],
+            'MOLECULAR_WEIGHTS': 147},
+    'Gln': {'TO_1': 'E',
+            'PROTEIN_TO_RNA_COMBINATION': {'CAA', 'CAG'},
+            'PKA_AMINOACIDS': [2.17, 9.13],
+            'MOLECULAR_WEIGHTS': 146},
+    'Gly': {'TO_1': 'G',
+            'PROTEIN_TO_RNA_COMBINATION': {'GGU', 'GGC', 'GGA', 'GGG'},
+            'PKA_AMINOACIDS': [2.34, 9.60],
+            'MOLECULAR_WEIGHTS': 75},
+    'His': {'TO_1': 'E',
+            'PROTEIN_TO_RNA_COMBINATION': {'CAU', 'CAC'},
+            'PKA_AMINOACIDS': [1.82, 9.17],
+            'MOLECULAR_WEIGHTS': 155},
+    'Ile': {'TO_1': 'I',
+            'PROTEIN_TO_RNA_COMBINATION': {'AUU', 'AUC', 'AUA'},
+            'PKA_AMINOACIDS': [2.36, 9.68],
+            'MOLECULAR_WEIGHTS': 131},
+    'Leu': {'TO_1': 'L',
+            'PROTEIN_TO_RNA_COMBINATION': {'CUU', 'CUC', 'CUA', 'CUG'},
+            'PKA_AMINOACIDS': [2.36, 9.60],
+            'MOLECULAR_WEIGHTS': 131},
+    'Lys': {'TO_1': 'K',
+            'PROTEIN_TO_RNA_COMBINATION': {'AAA', 'AAG'},
+            'PKA_AMINOACIDS': [2.18, 8.95, 10.53],
+            'MOLECULAR_WEIGHTS': 146},
+    'Met': {'TO_1': 'M',
+            'PROTEIN_TO_RNA_COMBINATION': {'AUG'},
+            'PKA_AMINOACIDS': [2.28, 9.21],
+            'MOLECULAR_WEIGHTS': 149},
+    'Phe': {'TO_1': 'F',
+            'PROTEIN_TO_RNA_COMBINATION': {'UUU', 'UUC'},
+            'PKA_AMINOACIDS': [2.20, 9.13],
+            'MOLECULAR_WEIGHTS': 165},
+    'Pro': {'TO_1': 'P',
+            'PROTEIN_TO_RNA_COMBINATION': {'CCU', 'CCC', 'CCA', 'CCG'},
+            'PKA_AMINOACIDS': [1.99, 10.96],
+            'MOLECULAR_WEIGHTS': 115},
+    'Ser': {'TO_1': 'S',
+            'PROTEIN_TO_RNA_COMBINATION': {'UCU', 'UCC', 'UCA', 'UCG'},
+            'PKA_AMINOACIDS': [2.21, 9.15],
+            'MOLECULAR_WEIGHTS': 105},
+    'Thr': {'TO_1': 'T',
+            'PROTEIN_TO_RNA_COMBINATION': {'ACU', 'ACC', 'ACA', 'ACG'},
+            'PKA_AMINOACIDS': [2.11, 9.62],
+            'MOLECULAR_WEIGHTS': 119},
+    'Tyr': {'TO_1': 'W',
+            'PROTEIN_TO_RNA_COMBINATION': {'UAU', 'UAC'},
+            'PKA_AMINOACIDS': [2.20, 9.11, 10.07],
+            'MOLECULAR_WEIGHTS': 181},
+    'Trp': {'TO_1': 'Y',
+            'PROTEIN_TO_RNA_COMBINATION': {'UGG'},
+            'PKA_AMINOACIDS': [2.38, 9.39],
+            'MOLECULAR_WEIGHTS': 204},
+    'Val': {'TO_1': 'V',
+            'PROTEIN_TO_RNA_COMBINATION': {'GUU', 'GUC', 'GUA', 'GUG'},
+            'PKA_AMINOACIDS': [2.32, 9.62],
+            'MOLECULAR_WEIGHTS': 117},
+}
+
+# A dictionary where keys are 1-letter and values are 3-letters codes
+TO_3_DICT = {nested_dict['TO_1']: key for key,
+             nested_dict in AMINOACIDS_DICT.items()}
+
+TRANSCRIBE_DICT: dict = {'A': 'A',
+                         'U': 'T',
+                         'G': 'G',
+                         'C': 'C',
+                         'a': 'a',
+                         'u': 't',
+                         'g': 'g',
+                         'c': 'c'}
+
+
+def is_one_letter(seq: str) -> bool:
+    """
+    Defines whether the sequence is 1 coded.
+
+    Args:
+    - seq - sequence to check
+
+    Returns:
+    - bool
+    """
+    return all(aa.isalpha() and aa.isupper() for aa in seq)
+
+
 def recode(seq: str) -> dict:
     """
     Translate 1-letter to 3-letter encoding if 1-letter
@@ -18,32 +129,18 @@ def recode(seq: str) -> dict:
     for original sequences keys
     """
 
-    TO_1_dict = {
-        'Ala': 'A', 'Arg': 'R', 'Asn': 'N', 'Asp': 'D',
-        'Cys': 'C', 'Gln': 'Q', 'Glu': 'E', 'Gly': 'G',
-        'His': 'H', 'Ile': 'I', 'Leu': 'L', 'Lys': 'K',
-        'Met': 'M', 'Phe': 'F', 'Pro': 'P', 'Ser': 'S',
-        'Thr': 'T', 'Trp': 'W', 'Tyr': 'Y', 'Val': 'V'
-    }
-
-    TO_3_dict = {v: k for k, v in TO_1_dict.items()}
-
-    # Check if the input sequence is in 1-letter or 3-letter format
-    is_one_letter = all(aa.isalpha() and aa.isupper() for aa in seq)
-
-    if is_one_letter:
+    if is_one_letter(seq):
         # Translate 1-letter to 3-letter coded sequence
         three_letter_sequence = ""
         for aa in seq:
-            three_letter_code = TO_3_dict.get(aa, aa)
+            three_letter_code = TO_3_DICT.get(aa, aa)
             three_letter_sequence += three_letter_code
         return three_letter_sequence
     # Translate 3-letter to 1-letter coded sequence
     one_letter_sequence = ""
     for aa in range(0, len(seq), 3):
         amino_acid = seq[aa:aa+3]
-        one_letter_sequence += TO_1_dict.get(amino_acid,
-                                             amino_acid)
+        one_letter_sequence += AMINOACIDS_DICT[amino_acid]['TO_1']
     return one_letter_sequence
 
 
@@ -71,7 +168,7 @@ def prettify_alignment(aligned_seq_on: str, aligned_seq2: str) -> None:
 
 
 def local_alignment(seq_on: str,
-                    seq2: Union[List[str], str],
+                    seq2: str,
                     alignment_dict: dict,
                     seq_id: int,
                     match=2,
@@ -191,269 +288,178 @@ def local_alignment(seq_on: str,
     return alignment_dict
 
 
-def check_input(*args: List[str]) -> Tuple[List[str],
-                                           str,
-                                           Optional[str]]:
-    """
-    Function to check the validity of the input.
-
-    Args:
-    *args - are supposed to be all sequences to process and the method to
-    process with.
-    The method is supposed to be the last argument.
-
-    Returns:
-    - seqs_list - list of sequences
-    - method - a chosen method to use
-    - seq_on (optional) - in case of local_alignment method
-    """
-
-    if len(args) < 1:
-        # Handle the case where there are no arguments
-        raise ValueError("No input defined.")
-    else:
-        # Check the last element of the input is a valid method
-        method = args[-1]
-        if method not in ['recode',
-                          'local_alignment',
-                          'from_proteins_seqs_to_rna',
-                          'isoelectric_point_determination',
-                          '']:
-            raise ValueError(method, " is not a valid method.")
-        else:
-            # Form a list with sequences from the input
-            seqs_list = list(args[:-1])
-            if method == 'local_alignment':
-                seq_on = seqs_list.pop(0)
-                return seqs_list, method, seq_on
-            seq_on = None
-            return seqs_list, method, seq_on
-    
-TRANSCRIBE_DICT = dict(A='A', U='T', G='G', C='C', a='a', u='t', g='g', c='c')
-def back_transcribe(*seqs: str) -> dict:
-    """
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns a dictonary, which [key] is inputed protein
-    sequence and values are DNA codons
-    """
-    result = {}
-    for seq in seqs:
-        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
-        for i in range(len(rna)):
-            if rna[i] in TRANSCRIBE_DICT.keys():
-                rna[i] = TRANSCRIBE_DICT[rna[i]]
-        result[seq] = "".join(rna)
-    return result
-    
-def count_gc_content(*seqs: str) -> dict:
-    '''
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace.
-    You can put as many arguments as you wish.
-    :return: THis function returns GC-content of DNA sequence, which encodes the protein
-    '''
-    result = {}
-    for seq in seqs:
-        dna = list((back_transcribe(seq)).get(seq))
-        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
-        result[seq] = gc_content
-    return result
-
-MOLECULAR_WEIGHTS = {
-    'Ala': 89,
-    'Cys': 121,
-    'Asp': 133,
-    'Glu': 147,
-    'Phe': 165,
-    'Gly': 75,
-    'His': 155,
-    'Ile': 131,
-    'Lys': 146,
-    'Leu': 131,
-    'Met': 149,
-    'Asn': 132,
-    'Pro': 115,
-    'Gln': 146,
-    'Arg': 174,
-    'Ser': 105,
-    'Thr': 119,
-    'Val': 117,
-    'Trp': 204,
-    'Tyr': 181}
-
-def count_protein_molecular_weight(*seqs: str) -> dict:
+def count_protein_molecular_weight(*seqs_list: Union[List[str], str]) -> dict:
     """
-    :param seqs: Seqs is an argument of the function. It is a string without whitespace
-    (f.g. 'AlaSer'). You can put as many arguments as you wish.
+    :param seqs_list: seqs_list is a list of strings without whitespace
+    (e.g. 'AlaSer'). You can put as many sequences as you wish.
     :return: This function returns molecular weight of the protein.
     """
     result = {}
-    for seq in seqs:
+    for seq in seqs_list:
         protein_weight = 0
         aminoacids = [seq[i:i + 3] for i in range(0, len(seq), 3)]
-        for i in range(len(aminoacids)):
-            if aminoacids[i] in MOLECULAR_WEIGHTS.keys():
-                aminoacid_weight = MOLECULAR_WEIGHTS[aminoacids[i]]
+        for i, aminoacid in enumerate(aminoacids):
+            if aminoacid in AMINOACIDS_DICT.keys():
+                aminoacid_weight = (AMINOACIDS_DICT[aminoacid]
+                                    ['MOLECULAR_WEIGHTS'])
                 protein_weight += aminoacid_weight
                 result[seq] = protein_weight
     return result
-    
-def main(*args: Tuple[Union[List[str], str], str]) -> dict:
-    """
-    This function provides the access to the following methods:
-    1. Local Alignment of two sequences - the last argument: 'local_alignment'
-       - needs at least 2 protein sequences 1-letter encoded.
-       When more than 2 sequences are passed, uses the first
-       entered sequence to align the rest on
-       - performs an alignment using Smith-Waterman algorithm
-    2. ...
-    3. ...
-    4. ...
-    5. ...
 
-    Args:
-    *args - are supposed to be all sequences to process and the method
-    to process with.
-    The method is supposed to be the last argument
-    To get started choose one of the possible programms to run:
-    1. Local alignment
-    Enter two protein sequences in 1- letter encoding. The code will return alignment scores and 
-    sequences aligned on each other. 
-    2. Call method
 
-    Returns:
-    function_result - result of a chosen function
+def from_proteins_seqs_to_rna(*seqs_list: Union[List[str], str]) -> dict:
     """
-
-    seqs_list, method, seq_on = check_input(*args)
-    print(seqs_list, method, seq_on)
-
-    match method:
-
-        case 'local_alignment':
-
-            alignment_dict: dict = {}
-            for seq_id, seq in enumerate(seqs_list):
-                function_result = local_alignment(seq_on=seq_on,
-                                                  seq2=seq,
-                                                  alignment_dict=alignment_dict,
-                                                  seq_id=seq_id,
-                                                  prettify=True)
-
-        case '':
-
-            pass
-
-        case _:
-
-            function_result = None
-
-    return function_result
-
-def from_proteins_seqs_to_rna(*seqs: str) -> dict:
-    """
-    :param seqs: strings with type 'ValTyrAla','AsnAspCys'.
-    seqs is args parameter, so you can pass more than one
-    sequences at the time.
+    :param seqs_list: a list of strings with type 'ValTyrAla','AsnAspCys'.
+    You can pass more than one sequence at the time.
     :return: dictionary, where [key] is your input protein sequences
     and values are combinations of RNA codones, which encode this protein
     """
-    PROTEIN_TO_RNA_COMBINATION = {
-        'Ala': {'GCU', 'GCC', 'GCA', 'GCG'},
-        'Arg': {'CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'},
-        'Asn': {'AAU', 'AAC'},
-        'Asp': {'GAU', 'GAC'},
-        'Cys': {'UGU', 'UGC'},
-        'Glu': {'GAA', 'GAG'},
-        'Gln': {'CAA', 'CAG'},
-        'Gly': {'GGU', 'GGC', 'GGA', 'GGG'},
-        'His': {'CAU', 'CAC'},
-        'Ile': {'AUU', 'AUC', 'AUA'},
-        'Leu': {'CUU', 'CUC', 'CUA', 'CUG'},
-        'Lys': {'AAA', 'AAG'},
-        'Met': {'AUG'},
-        'Phe': {'UUU', 'UUC'},
-        'Pro': {'CCU', 'CCC', 'CCA', 'CCG'},
-        'Ser': {'UCU', 'UCC', 'UCA', 'UCG'},
-        'Thr': {'ACU', 'ACC', 'ACA', 'ACG'},
-        'Tyr': {'UAU', 'UAC'},
-        'Trp': {'UGG'},
-        'Val': {'GUU', 'GUC', 'GUA', 'GUG'},
-    }
+
     answer_dictionary = {}
-    for aminoacids in seqs:
+    for seq in seqs_list:
+        
         rna_combination = ''
-        divided_acids = [aminoacids[i:i + 3] for i in range(0,
-                                                            len(aminoacids),
-                                                            3)]
+        divided_acids = [seq[i:i + 3] for i in range(0,
+                                                     len(seq),
+                                                     3)]
         for divided_acid in divided_acids:
-            if divided_acid in PROTEIN_TO_RNA_COMBINATION.keys():
-                rna_combination += next(iter(PROTEIN_TO_RNA_COMBINATION[divided_acid]))
+            
+            if divided_acid in AMINOACIDS_DICT.keys():
+                rna_combination += next(iter(AMINOACIDS_DICT[divided_acid]
+                                             ['PROTEIN_TO_RNA_COMBINATION']))
             else:
                 raise ValueError('Non-protein aminoacids in sequence')
-        answer_dictionary[aminoacids] = rna_combination
+        answer_dictionary[seq] = rna_combination
     return answer_dictionary
 
 
-def isoelectric_point_determination(*seqs: str) -> dict:
+def isoelectric_point_determination(*seqs_list: Union[List[str], str]) -> dict:
     """
-    :param seqs: strings with type 'ValTyrAla','AsnAspCys'.
-    seqs is args parameter, so you can pass more than one
-    sequences at a time.
+    :param seqs_list: a list of strings with type 'ValTyrAla','AsnAspCys'.
+    You can pass more than one sequence at a time.
     :return: dictionary, where [key] is your input protein sequence and value
     is an isoelectric point of your input proteins
     """
-    PKA_AMINOACIDS = {
-        'Ala': [2.34, 9.69],
-        'Arg': [2.17, 9.04, 12.68],
-        'Asn': [1.88, 9.60, 3.65],
-        'Asp': [1.88, 9.60, 3.65],
-        'Cys': [1.96, 10.28, 8.18],
-        'Glu': [2.19, 9.67, 4.25],
-        'Gln': [2.17, 9.13],
-        'Gly': [2.34, 9.60],
-        'His': [1.82, 9.17],
-        'Ile': [2.36, 9.68],
-        'Leu': [2.36, 9.60],
-        'Lys': [2.18, 8.95, 10.53],
-        'Met': [2.28, 9.21],
-        'Phe': [2.20, 9.13],
-        'Pro': [1.99, 10.96],
-        'Ser': [2.21, 9.15],
-        'Thr': [2.11, 9.62],
-        'Tyr': [2.20, 9.11, 10.07],
-        'Trp': [2.38, 9.39],
-        'Val': [2.32, 9.62],
-    }
-
     answer_dictionary = {}
 
-    for aminoacids in seqs:
-        divided_acids = [aminoacids[i:i + 3] for i in range(0, len(aminoacids), 3)]
+    for aminoacids in seqs_list:
+        divided_acids = [aminoacids[i:i + 3] for i in range(0,
+                                                            len(aminoacids),
+                                                            3)]
         for divided_acid in divided_acids:
-            if divided_acid not in PKA_AMINOACIDS.keys():
+            if divided_acid not in AMINOACIDS_DICT.keys():
                 raise ValueError('Non-protein aminoacids in sequence')
 
         isoelectric_point_mean = 0
         count_groups = 0
-        for acid_index in range(0, len(divided_acids)):
+        for acid_index, aminoacid in enumerate(divided_acids):
             if acid_index == 0:
                 isoelectric_point_mean\
-                    += PKA_AMINOACIDS[divided_acids[acid_index]][0]
+                    += (AMINOACIDS_DICT[aminoacid]['PKA_AMINOACIDS'][0])
                 count_groups += 1
             elif acid_index == len(divided_acids) - 1:
                 isoelectric_point_mean = (isoelectric_point_mean
-                                          + PKA_AMINOACIDS[divided_acids[acid_index]][-1])
+                                          + (AMINOACIDS_DICT[aminoacid]
+                                             ['PKA_AMINOACIDS'][-1]))
                 count_groups += 1
             else:
-                if len(PKA_AMINOACIDS[divided_acids[acid_index]]) > 2:
+                if len(AMINOACIDS_DICT[aminoacid]['PKA_AMINOACIDS']) > 2:
                     isoelectric_point_mean = (isoelectric_point_mean
-                                              + PKA_AMINOACIDS[divided_acids[acid_index]][1])
+                                              + (AMINOACIDS_DICT[aminoacid]
+                                                 ['PKA_AMINOACIDS'][1]))
                     count_groups += 1
         answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups
     return answer_dictionary
 
-def main(*args: Tuple[Union[List[str], str], str]) -> dict:
+
+def back_transcribe(*seqs_list: Union[List[str], str]) -> dict:
+    """
+    :param seqs_list: is a list of strings without whitespace. 
+    You can put as many sequences as you wish.
+    :return: This function returns a dictonary where key is inputed protein
+    sequence and values are DNA codons
+    """
+    result = {}
+    for seq in seqs_list:
+        rna = list((from_proteins_seqs_to_rna(seq)).get(seq))
+        for i in range(len(rna)):
+            if rna[i] in TRANSCRIBE_DICT.keys():
+                rna[i] = TRANSCRIBE_DICT[rna[i]]
+        result[seq] = "".join(rna)
+    return result
+
+
+def count_gc_content(*seqs_list: Union[List[str], str]) -> dict:
+    """
+    :param seqs_list: is a list of strings without whitespace. 
+    You can put as many sequences as you wish.
+    :return: This function returns GC-content of DNA sequence, which encodes
+    the protein
+    """
+    result = {}
+    for seq in seqs_list:
+        dna = list((back_transcribe(seq)).get(seq))
+        gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna))
+        result[seq] = gc_content
+    return result
+
+
+def check_input(*args: Union[List[str], str], method: str) -> \
+                                    Tuple[List[str], Optional[str]]:
+    """
+    Function to check the validity of the input.
+
+    Args:
+    - *args - are supposed to be all sequences to process
+    - method - the method to process with method
+
+    Returns:
+    - seqs_list - list of sequences
+    - seq_on (optional) - in case of local_alignment method
+    """
+
+    if len(args) == 0:
+        # Handle the case where there are no arguments
+        raise ValueError('No input defined.')
+    else:
+        if method not in ['recode',
+                          'local_alignment',
+                          'from_proteins_seqs_to_rna',
+                          'isoelectric_point_determination',
+                          'count_protein_molecular_weight',
+                          'back_transcribe',
+                          'count_gc_content']:
+            raise ValueError(method, ' is not a valid method.')
+        else:
+            # Form a list with sequences from the input
+            seqs_list = list(args)
+            if method == 'local_alignment':
+                if len(seqs_list) < 2:
+                    raise IndexError('Need at least two sequences to align.')
+                for i, seq in enumerate(seqs_list):
+                    if not is_one_letter(seq):
+                        print('Warning! Function local_alignment() needs '
+                              '1-letter encoded sequences. Your sequence '
+                              'will be mutated to a 1-letter encoding.')
+                        seqs_list[i] = recode(seq)
+                        print(seq, ' sequence has been mutated into: ',
+                              seqs_list[i])
+                        seq_on = seqs_list.pop(0)
+                return seqs_list, seq_on
+            for i, seq in enumerate(seqs_list):
+                if is_one_letter(seq):
+                    print(f'Warning! Function {method}() needs '
+                          '3-letter encoded sequences. Your sequence '
+                          'will be mutated to a 3-letter encoding.')
+                    seqs_list[i] = recode(seq)
+                    print(seq, ' sequence has been mutated into: ',
+                          seqs_list[i])
+            seq_on = None
+            return seqs_list, seq_on
+
+
+def main(*args: Tuple[Union[List[str], str]],
+         method: Optional[str] = None) -> dict:
     """
     This function provides the access to the following methods:
 
@@ -470,8 +476,8 @@ def main(*args: Tuple[Union[List[str], str], str]) -> dict:
        entered sequence to align the rest on
        - performs an alignment using Smith-Waterman algorithm
 
-    3. Find all possible RNA sequences for defined protein sequence - the
-    last argument: from_proteins_seqs_to_rna
+    3. Find possible RNA sequences for defined protein sequence - the
+    last argument: 'from_proteins_seqs_to_rna'
         - needs at least 1 protein sequence 3-letter encoded
         - returns a dictionary, where key is your input protein sequences
         and values are combinations of RNA codones, which encode this protein
@@ -483,19 +489,37 @@ def main(*args: Tuple[Union[List[str], str], str]) -> dict:
         - returns a dictionary, where key is your input protein sequence and
         value is an isoelectric point of this protein
 
-    4. ...
-    5. ...
+    5. Calculate protein molecular weight - the last argument:
+    'count_protein_molecular_weight'
+        - Seqs is an argument of the function. It is a string without
+    whitespace (e.g. 'AlaSer'). You can put as many arguments as you wish.
+        - returns a dictionary with protein sequences as keys and their
+        calculated molecular weight as corresponding values
+
+    6. Determine possible DNA sequence from protein sequence - the last
+    argument: 'back_transcribe'
+        - needs a string without whitespaces. You can put as many arguments as
+        you wish.
+        - returns a dictonary where keys are inputed protein sequences and
+        corresponding values are possible DNA codons
+
+    7. Calculate a GC ratio in a possible DNA sequence of a given aminoacid
+    sequence - the last argument 'count_gc_content'
+        - needs a string without whitespaces. You can put as many sequences
+        as you wish.
+        - returns a dictionary where keys are inputed aminoacid sequences and
+        GC-content of DNA sequence, which encodes the protein are
+        corresponding values
 
     Args:
-    *args - are supposed to be all sequences to process and the method
-    to process with.
-    The method is supposed to be the last argument.
+    - *args - are supposed to be all sequences to process
+    - method is a kwarg - the method to process with.
 
     Returns:
-    function_result - result of a chosen function
+    function_result - a dictionary with the result of a chosen function
     """
 
-    seqs_list, method, seq_on = check_input(*args)
+    seqs_list, seq_on = check_input(*args, method=method)
     print(f'Your sequences are: {seqs_list}',
           f'The method is: {method}', sep='\n')
 
@@ -510,21 +534,31 @@ def main(*args: Tuple[Union[List[str], str], str]) -> dict:
 
         case 'local_alignment':
 
-            print('The sequence align on: ', seq_on)
             alignment_dict: dict = {}
             for seq_id, seq in enumerate(seqs_list):
-                function_result = local_alignment(seq_on=seq_on,
-                                                  seq2=seq,
-                                                  alignment_dict=alignment_dict,
-                                                  seq_id=seq_id,
-                                                  prettify=True)
+                local_alignment(seq_on=seq_on,
+                                seq2=seq,
+                                alignment_dict=alignment_dict,
+                                seq_id=seq_id,
+                                prettify=True)
+            return alignment_dict
+
+        case 'from_proteins_seqs_to_rna':
+
+            return from_proteins_seqs_to_rna(*seqs_list)
+
+        case 'count_protein_molecular_weight':
+
+            return count_protein_molecular_weight(*seqs_list)
+
+        case 'isoelectric_point_determination':
 
-        case '':
+            return isoelectric_point_determination(*seqs_list)
 
-            pass
+        case 'back_transcribe':
 
-        case _:
+            return back_transcribe(*seqs_list)
 
-            function_result = None
+        case 'count_gc_content':
 
-    return function_result
+            return count_gc_content(*seqs_list)

From 05caf3c398e113823b6faa4eb3513d29d2330134 Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:13:14 +0300
Subject: [PATCH 20/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 81 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 8a4222d..886e4e5 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -2,18 +2,93 @@
 > *This is the repo for the fourth homework of the BI Python 2023 course*
 
 ### Title
+'prototool.py' is a special script for working with polyaminoacid sequences
 
-### Overview 
+### Overview
+'prototool.py' includes 7 methods to treatment of polyaminoacid sequences.
+'prototool.py' can be used for the next goals:
+- recoding 1-letter coded polyaminoacid seqeunces into 3-letter coded and vice versa;
+- polyaminoacid sequences aligment with Smith-Waterman algorithm [^1];
+- finding possinle RNA sequences for given polyaminoacid sequences;
+- determining polyaminoacid isoelectric point;
+- calculating polyaminoacid molecular weight;
+- finding possinle DNA sequences for given polyaminoacid sequences; 
+- determining GC-content of a corresponding DNA sequence to a given polyaminoacid sequence
 
 ### Usage
+This tool can be used both standalone and as module.
+- to use 'prototools' standalone you will have to add these lines in the code
+  ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/5fa3cf7f-e6f3-4294-9e81-b1ebe17c8514)
+  - where *args are sequences you want to process and method is a specified algorithm to use
+  - your result will be written in a variable (test on a picture)
+- to use 'prototools' as module (recomended) you should import it as any other module (check the path: prototools.py should be in the same directory as your script). Then you can freely use any of its functions (see examples).
 
 ### Options
+Arguments:
+- '''*args[str]''' sequences to work with. You can pass several arguments into all functions
+- method - a method to use
+***
+output: All functions return a '''dictionary''' , where keys are original sequenses, values are results after using a corresponding method.
 
 ### Examples
+***
+def recode allows to translate 1-letter to 3-letters polyaminoacids code
+- '''main('AlaValTyr', 'DNT', method = 'recode')'''
+- '''recode('AlaValTyr', 'DNT')'''
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/117befa5-feaa-433a-9ac9-23cffe9b024f)
+
+***
+def local_alignmen perform a local alignment of 2 given sequences. Needs at least two sequences to be passed
+- '''main('MetAsnTrp', 'MNT', method='local_alignment')'''
+- '''local_alignmen('MetAsnTrp', 'MNT')'''
+- Note that local_alignment function has a flag prettify (default = True) that prints out aligned sequences on each another
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/4dd36d24-a177-4419-9053-a5e2923a980c)
+
+***
+def from_proteins_seqs_to_rna allows to decode polyaminoacid sequences in RNA sequences
+- '''main('AlaValTyr', 'DNT', method = 'from_proteins_seqs_to_rna')'''
+- '''from_proteins_seqs_to_rna('AlaValTyr', 'DNT')'''
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/9ee92d0d-68a4-471b-b65a-2fa6b46ab844)
+
+***
+def isoelectric_point_determination allows to determine isoelectric point of polyaminoacid sequences 
+- '''main('AlaValTyr', 'DNT', method = 'isoelectric_point_determination')'''
+- '''isoelectric_point_determination('AlaValTyr', 'DNT')'''
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/24027a07-b20b-42d4-bb10-4ca7189038d4)
+
+***
+def back_transcribe allows to decode polyaminoacid sequences in DNA sequences
+- '''main('AlaValTyr', 'DNT', method = 'back_transcribe')'''
+- '''back_transcribe('AlaValTyr', 'DNT')'''
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/71f07616-a37d-48da-9e63-82b81836b9d7)
+
+***
+def count_gc_content allows to count the ratio of GC in the entire DNA sequence
+- '''main('AlaValTyr', 'DNT', method = 'count_gc_content')'''
+- '''count_gc_content('AlaValTyr', 'DNT')'''
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/d2705714-a3e8-4054-8998-61d922a4feb6)
+
+***
+def count_protein_molecular_weight allows to calculate the molecular weight of the polyaminoacid
+- '''main('AlaValTyr', 'DNT', method = 'count_protein_molecular_weight')'''
+- '''count_protein_molecular_weight('AlaValTyr', 'DNT')'''
+- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/cc1eff9a-1b39-4232-98e4-80f622101083)
 
 ### Troubleshooting
+If you have '''ValueError("No input defined.")''' it means, that you have an empty input. Please, enter the correct input. 
+***
+If you have '''ValueError(method, " is not a valid method.")''' it means, that your tool is not correct. Please, enter the right tool.
+***
+If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, that your sequences contain non-protein aminoacids. Please, check your sequences and enter the correct input. 
+
+### References
+[^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
 
-### Contacts
+### Contributions and contacts
 
 Feel free to report any bugs and problems encountered.
-Email: nikita.sapozhnikov1@gmail.com
\ No newline at end of file
+Email: nikita.sapozhnikov1@gmail.com developed recode(), prettify_alignment(), local_alignmen(), check_input()
+***
+nekrasovadasha22@mail.ru developed from_proteins_seqs_to_rna(), isoelectric_point_determination()
+*** 
+alina.potyseva@gmail.com developed back_transcribe(), count_gc_content(), count_protein_molecular_weight()

From 1bdbb2f8cb8ed92223b464fe2f5dff44dc22b029 Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:16:52 +0300
Subject: [PATCH 21/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 886e4e5..8c4564f 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -4,6 +4,8 @@
 ### Title
 'prototool.py' is a special script for working with polyaminoacid sequences
 
+***
+
 ### Overview
 'prototool.py' includes 7 methods to treatment of polyaminoacid sequences.
 'prototool.py' can be used for the next goals:
@@ -15,6 +17,8 @@
 - finding possinle DNA sequences for given polyaminoacid sequences; 
 - determining GC-content of a corresponding DNA sequence to a given polyaminoacid sequence
 
+***
+
 ### Usage
 This tool can be used both standalone and as module.
 - to use 'prototools' standalone you will have to add these lines in the code
@@ -23,57 +27,63 @@ This tool can be used both standalone and as module.
   - your result will be written in a variable (test on a picture)
 - to use 'prototools' as module (recomended) you should import it as any other module (check the path: prototools.py should be in the same directory as your script). Then you can freely use any of its functions (see examples).
 
+***
+
 ### Options
 Arguments:
 - '''*args[str]''' sequences to work with. You can pass several arguments into all functions
 - method - a method to use
+
+output: All functions return a dict, where keys are original sequenses, values are results after using a corresponding method.
+
 ***
-output: All functions return a '''dictionary''' , where keys are original sequenses, values are results after using a corresponding method.
 
 ### Examples
-***
+
 def recode allows to translate 1-letter to 3-letters polyaminoacids code
 - '''main('AlaValTyr', 'DNT', method = 'recode')'''
 - '''recode('AlaValTyr', 'DNT')'''
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/117befa5-feaa-433a-9ac9-23cffe9b024f)
-
 ***
+
 def local_alignmen perform a local alignment of 2 given sequences. Needs at least two sequences to be passed
 - '''main('MetAsnTrp', 'MNT', method='local_alignment')'''
 - '''local_alignmen('MetAsnTrp', 'MNT')'''
 - Note that local_alignment function has a flag prettify (default = True) that prints out aligned sequences on each another
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/4dd36d24-a177-4419-9053-a5e2923a980c)
-
 ***
+
 def from_proteins_seqs_to_rna allows to decode polyaminoacid sequences in RNA sequences
 - '''main('AlaValTyr', 'DNT', method = 'from_proteins_seqs_to_rna')'''
 - '''from_proteins_seqs_to_rna('AlaValTyr', 'DNT')'''
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/9ee92d0d-68a4-471b-b65a-2fa6b46ab844)
-
 ***
+
 def isoelectric_point_determination allows to determine isoelectric point of polyaminoacid sequences 
 - '''main('AlaValTyr', 'DNT', method = 'isoelectric_point_determination')'''
 - '''isoelectric_point_determination('AlaValTyr', 'DNT')'''
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/24027a07-b20b-42d4-bb10-4ca7189038d4)
-
 ***
+
 def back_transcribe allows to decode polyaminoacid sequences in DNA sequences
 - '''main('AlaValTyr', 'DNT', method = 'back_transcribe')'''
 - '''back_transcribe('AlaValTyr', 'DNT')'''
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/71f07616-a37d-48da-9e63-82b81836b9d7)
-
 ***
+
 def count_gc_content allows to count the ratio of GC in the entire DNA sequence
 - '''main('AlaValTyr', 'DNT', method = 'count_gc_content')'''
 - '''count_gc_content('AlaValTyr', 'DNT')'''
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/d2705714-a3e8-4054-8998-61d922a4feb6)
-
 ***
+
 def count_protein_molecular_weight allows to calculate the molecular weight of the polyaminoacid
 - '''main('AlaValTyr', 'DNT', method = 'count_protein_molecular_weight')'''
 - '''count_protein_molecular_weight('AlaValTyr', 'DNT')'''
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/cc1eff9a-1b39-4232-98e4-80f622101083)
 
+***
+
 ### Troubleshooting
 If you have '''ValueError("No input defined.")''' it means, that you have an empty input. Please, enter the correct input. 
 ***
@@ -81,9 +91,14 @@ If you have '''ValueError(method, " is not a valid method.")''' it means, that y
 ***
 If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, that your sequences contain non-protein aminoacids. Please, check your sequences and enter the correct input. 
 
+***
+
 ### References
+
 [^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
 
+***
+
 ### Contributions and contacts
 
 Feel free to report any bugs and problems encountered.

From 3d76bb5c66660aed840b581f1df6937dc12ecb8e Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:19:41 +0300
Subject: [PATCH 22/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 8c4564f..ff2f0dc 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -10,7 +10,7 @@
 'prototool.py' includes 7 methods to treatment of polyaminoacid sequences.
 'prototool.py' can be used for the next goals:
 - recoding 1-letter coded polyaminoacid seqeunces into 3-letter coded and vice versa;
-- polyaminoacid sequences aligment with Smith-Waterman algorithm [^1];
+- polyaminoacid sequences aligment with Smith-Waterman algorithm [[1]](#ref1);
 - finding possinle RNA sequences for given polyaminoacid sequences;
 - determining polyaminoacid isoelectric point;
 - calculating polyaminoacid molecular weight;
@@ -95,8 +95,9 @@ If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, tha
 
 ### References
 
-[^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
+1. T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
 
+[1]: #ref1
 ***
 
 ### Contributions and contacts

From f32641a5420a852ca42c147463217dc5a139202a Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:21:22 +0300
Subject: [PATCH 23/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index ff2f0dc..59bfc6d 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -10,7 +10,7 @@
 'prototool.py' includes 7 methods to treatment of polyaminoacid sequences.
 'prototool.py' can be used for the next goals:
 - recoding 1-letter coded polyaminoacid seqeunces into 3-letter coded and vice versa;
-- polyaminoacid sequences aligment with Smith-Waterman algorithm [[1]](#ref1);
+- polyaminoacid sequences aligment with Smith-Waterman algorithm [^1];
 - finding possinle RNA sequences for given polyaminoacid sequences;
 - determining polyaminoacid isoelectric point;
 - calculating polyaminoacid molecular weight;
@@ -95,7 +95,7 @@ If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, tha
 
 ### References
 
-1. T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
+- T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
 
 [1]: #ref1
 ***

From 9d6f68726d5ab1bb680c9b24fe57aea86290e69e Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:22:00 +0300
Subject: [PATCH 24/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 59bfc6d..e84e6af 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -97,7 +97,6 @@ If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, tha
 
 - T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
 
-[1]: #ref1
 ***
 
 ### Contributions and contacts

From 9563f394772767988e4c3e77a9fe8d1cc6caa59b Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:22:39 +0300
Subject: [PATCH 25/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index e84e6af..8c4564f 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -95,7 +95,7 @@ If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, tha
 
 ### References
 
-- T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
+[^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
 
 ***
 

From 597f21b47492b0e145c4423058cd5549d95db293 Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:23:03 +0300
Subject: [PATCH 26/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 8c4564f..e23bff4 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -93,12 +93,6 @@ If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, tha
 
 ***
 
-### References
-
-[^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.
-
-***
-
 ### Contributions and contacts
 
 Feel free to report any bugs and problems encountered.
@@ -107,3 +101,9 @@ Email: nikita.sapozhnikov1@gmail.com developed recode(), prettify_alignment(), l
 nekrasovadasha22@mail.ru developed from_proteins_seqs_to_rna(), isoelectric_point_determination()
 *** 
 alina.potyseva@gmail.com developed back_transcribe(), count_gc_content(), count_protein_molecular_weight()
+
+***
+
+### References
+
+[^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology.

From f6a34ef821052d1c790911ac5b410b5062e88bfb Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:26:37 +0300
Subject: [PATCH 27/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index e23bff4..21ca04e 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -31,8 +31,8 @@ This tool can be used both standalone and as module.
 
 ### Options
 Arguments:
-- '''*args[str]''' sequences to work with. You can pass several arguments into all functions
-- method - a method to use
+- '*args[str]' sequences to work with. You can pass several arguments into all functions
+- 'method' - a method to use
 
 output: All functions return a dict, where keys are original sequenses, values are results after using a corresponding method.
 
@@ -41,8 +41,8 @@ output: All functions return a dict, where keys are original sequenses, values a
 ### Examples
 
 def recode allows to translate 1-letter to 3-letters polyaminoacids code
-- '''main('AlaValTyr', 'DNT', method = 'recode')'''
-- '''recode('AlaValTyr', 'DNT')'''
+- 'main('AlaValTyr', 'DNT', method = 'recode')'
+- 'recode('AlaValTyr', 'DNT')'
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/117befa5-feaa-433a-9ac9-23cffe9b024f)
 ***
 

From f6e10e8622b028e9c9793f3411b90920a1b43e22 Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:30:55 +0300
Subject: [PATCH 28/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 48 +++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 21ca04e..0c7f9d2 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -2,13 +2,13 @@
 > *This is the repo for the fourth homework of the BI Python 2023 course*
 
 ### Title
-'prototool.py' is a special script for working with polyaminoacid sequences
+`prototool.py` is a special script for working with polyaminoacid sequences
 
 ***
 
 ### Overview
-'prototool.py' includes 7 methods to treatment of polyaminoacid sequences.
-'prototool.py' can be used for the next goals:
+`prototool.py` includes 7 methods to treatment of polyaminoacid sequences.
+`prototool.py` can be used for the next goals:
 - recoding 1-letter coded polyaminoacid seqeunces into 3-letter coded and vice versa;
 - polyaminoacid sequences aligment with Smith-Waterman algorithm [^1];
 - finding possinle RNA sequences for given polyaminoacid sequences;
@@ -21,18 +21,18 @@
 
 ### Usage
 This tool can be used both standalone and as module.
-- to use 'prototools' standalone you will have to add these lines in the code
+- to use `prototool` standalone you will have to add these lines in the code
   ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/5fa3cf7f-e6f3-4294-9e81-b1ebe17c8514)
   - where *args are sequences you want to process and method is a specified algorithm to use
   - your result will be written in a variable (test on a picture)
-- to use 'prototools' as module (recomended) you should import it as any other module (check the path: prototools.py should be in the same directory as your script). Then you can freely use any of its functions (see examples).
+- to use `prototool` as module (recomended) you should import it as any other module (check the path: prototools.py should be in the same directory as your script). Then you can freely use any of its functions (see examples).
 
 ***
 
 ### Options
 Arguments:
-- '*args[str]' sequences to work with. You can pass several arguments into all functions
-- 'method' - a method to use
+- `*args[str]` sequences to work with. You can pass several arguments into all functions
+- `method` - a method to use
 
 output: All functions return a dict, where keys are original sequenses, values are results after using a corresponding method.
 
@@ -41,55 +41,55 @@ output: All functions return a dict, where keys are original sequenses, values a
 ### Examples
 
 def recode allows to translate 1-letter to 3-letters polyaminoacids code
-- 'main('AlaValTyr', 'DNT', method = 'recode')'
-- 'recode('AlaValTyr', 'DNT')'
+- `main('AlaValTyr', 'DNT', method = 'recode')`
+- `recode('AlaValTyr', 'DNT')`
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/117befa5-feaa-433a-9ac9-23cffe9b024f)
 ***
 
 def local_alignmen perform a local alignment of 2 given sequences. Needs at least two sequences to be passed
-- '''main('MetAsnTrp', 'MNT', method='local_alignment')'''
-- '''local_alignmen('MetAsnTrp', 'MNT')'''
+- `main('MetAsnTrp', 'MNT', method='local_alignment')`
+- `local_alignmen('MetAsnTrp', 'MNT')`
 - Note that local_alignment function has a flag prettify (default = True) that prints out aligned sequences on each another
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/4dd36d24-a177-4419-9053-a5e2923a980c)
 ***
 
 def from_proteins_seqs_to_rna allows to decode polyaminoacid sequences in RNA sequences
-- '''main('AlaValTyr', 'DNT', method = 'from_proteins_seqs_to_rna')'''
-- '''from_proteins_seqs_to_rna('AlaValTyr', 'DNT')'''
+- `main('AlaValTyr', 'DNT', method = 'from_proteins_seqs_to_rna')`
+- `from_proteins_seqs_to_rna('AlaValTyr', 'DNT')`
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/9ee92d0d-68a4-471b-b65a-2fa6b46ab844)
 ***
 
 def isoelectric_point_determination allows to determine isoelectric point of polyaminoacid sequences 
-- '''main('AlaValTyr', 'DNT', method = 'isoelectric_point_determination')'''
-- '''isoelectric_point_determination('AlaValTyr', 'DNT')'''
+- `main('AlaValTyr', 'DNT', method = 'isoelectric_point_determination')`
+- `isoelectric_point_determination('AlaValTyr', 'DNT')`
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/24027a07-b20b-42d4-bb10-4ca7189038d4)
 ***
 
 def back_transcribe allows to decode polyaminoacid sequences in DNA sequences
-- '''main('AlaValTyr', 'DNT', method = 'back_transcribe')'''
-- '''back_transcribe('AlaValTyr', 'DNT')'''
+- `main('AlaValTyr', 'DNT', method = 'back_transcribe')`
+- `back_transcribe('AlaValTyr', 'DNT')`
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/71f07616-a37d-48da-9e63-82b81836b9d7)
 ***
 
 def count_gc_content allows to count the ratio of GC in the entire DNA sequence
-- '''main('AlaValTyr', 'DNT', method = 'count_gc_content')'''
-- '''count_gc_content('AlaValTyr', 'DNT')'''
+- `main('AlaValTyr', 'DNT', method = 'count_gc_content')`
+- `count_gc_content('AlaValTyr', 'DNT')`
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/d2705714-a3e8-4054-8998-61d922a4feb6)
 ***
 
 def count_protein_molecular_weight allows to calculate the molecular weight of the polyaminoacid
-- '''main('AlaValTyr', 'DNT', method = 'count_protein_molecular_weight')'''
-- '''count_protein_molecular_weight('AlaValTyr', 'DNT')'''
+- `main('AlaValTyr', 'DNT', method = 'count_protein_molecular_weight')`
+- `count_protein_molecular_weight('AlaValTyr', 'DNT')`
 - ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/cc1eff9a-1b39-4232-98e4-80f622101083)
 
 ***
 
 ### Troubleshooting
-If you have '''ValueError("No input defined.")''' it means, that you have an empty input. Please, enter the correct input. 
+If you have `ValueError("No input defined.")` it means, that you have an empty input. Please, enter the correct input. 
 ***
-If you have '''ValueError(method, " is not a valid method.")''' it means, that your tool is not correct. Please, enter the right tool.
+If you have `ValueError(method, " is not a valid method.")` it means, that your tool is not correct. Please, enter the right tool.
 ***
-If you have '''ValueError('Non-protein aminoacids in sequence')''' it means, that your sequences contain non-protein aminoacids. Please, check your sequences and enter the correct input. 
+If you have `ValueError('Non-protein aminoacids in sequence')` it means, that your sequences contain non-protein aminoacids. Please, check your sequences and enter the correct input. 
 
 ***
 

From fc97f7dc0e53f8a88f03d1398b9614c3b8e63a25 Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Sun, 1 Oct 2023 12:33:14 +0300
Subject: [PATCH 29/30] Update README.md

---
 HW4_Sapozhnikov/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index 0c7f9d2..dce6161 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -1,7 +1,7 @@
 # HW 4. Functions 2
 > *This is the repo for the fourth homework of the BI Python 2023 course*
 
-### Title
+### Prototool
 `prototool.py` is a special script for working with polyaminoacid sequences
 
 ***

From ca5cf540a197cc205f726aa34f9d6ff210f960ae Mon Sep 17 00:00:00 2001
From: Nikita <81642791+NSapozhnikov@users.noreply.github.com>
Date: Fri, 6 Oct 2023 16:48:55 +0300
Subject: [PATCH 30/30] Update HW4_Sapozhnikov/README.md

Co-authored-by: Nikita Vaulin <vaulin@ro.ru>
---
 HW4_Sapozhnikov/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md
index dce6161..c347e60 100644
--- a/HW4_Sapozhnikov/README.md
+++ b/HW4_Sapozhnikov/README.md
@@ -8,7 +8,7 @@
 
 ### Overview
 `prototool.py` includes 7 methods to treatment of polyaminoacid sequences.
-`prototool.py` can be used for the next goals:
+`prototool.py` can be used for the following purposes:
 - recoding 1-letter coded polyaminoacid seqeunces into 3-letter coded and vice versa;
 - polyaminoacid sequences aligment with Smith-Waterman algorithm [^1];
 - finding possinle RNA sequences for given polyaminoacid sequences;