From 921d35b1fed9432a43c9e218a4b15475633b44aa Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Tue, 26 Sep 2023 18:40:21 +0300
Subject: [PATCH 01/25] Create folder for work

---
 HW4_Trofimov/folder_file | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 HW4_Trofimov/folder_file

diff --git a/HW4_Trofimov/folder_file b/HW4_Trofimov/folder_file
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/HW4_Trofimov/folder_file
@@ -0,0 +1 @@
+

From 49a006a9de21165a7d5065fa59f0b779bc30315d Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Tue, 26 Sep 2023 18:41:00 +0300
Subject: [PATCH 02/25] Delete HW4_Trofimov/folder_file

---
 HW4_Trofimov/folder_file | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 HW4_Trofimov/folder_file

diff --git a/HW4_Trofimov/folder_file b/HW4_Trofimov/folder_file
deleted file mode 100644
index 8b13789..0000000
--- a/HW4_Trofimov/folder_file
+++ /dev/null
@@ -1 +0,0 @@
-

From b8a58af1b90cb7b0693407556648311c88afda37 Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Tue, 26 Sep 2023 18:43:00 +0300
Subject: [PATCH 03/25] Create working folder

---
 HW4_Trofimov/test_file | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 HW4_Trofimov/test_file

diff --git a/HW4_Trofimov/test_file b/HW4_Trofimov/test_file
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/HW4_Trofimov/test_file
@@ -0,0 +1 @@
+

From 53527e62d4b492bc87aebc3c37fa9cb3575540f6 Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Tue, 26 Sep 2023 18:51:23 +0300
Subject: [PATCH 04/25] Rename main script

---
 HW4_Trofimov/{test_file => das_protein_tools.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename HW4_Trofimov/{test_file => das_protein_tools.py} (100%)

diff --git a/HW4_Trofimov/test_file b/HW4_Trofimov/das_protein_tools.py
similarity index 100%
rename from HW4_Trofimov/test_file
rename to HW4_Trofimov/das_protein_tools.py

From 2e0b8263a83359a4fd0b755cdb40089aadab2796 Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Wed, 27 Sep 2023 16:09:07 +0300
Subject: [PATCH 05/25] Add function for isoelectric point calculation

---
 HW4_Trofimov/das_protein_tools.py | 123 ++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 8b13789..5a340cc 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -1 +1,124 @@
+def calculate_pI(
+    sequence: str,
+    pKa_values: dict = {
+        "D": 3.86,  # Aspartic acid (COOH side chain)
+        "E": 4.25,  # Glutamic acid (COOH side chain)
+        "C": 8.33,  # Cysteine (R-SH)
+        "Y": 10.46,  # Tyrosine (phenolic OH)
+        "H": 6.0,  # Histidine (imidazole group)
+        "K": 10.67,  # Lysine (amino group)
+        "R": 12.48,  # Arginine (guanidinium group)
+        "N": 3.22,  # Asparagine (amino group)
+        "Q": 3.65,  # Glutamine (amino group)
+        "T": 2.95,  # Threonine (amino group)
+        "S": 2.19,  # Serine (hydroxyl group)
+        "W": 11.55,  # Tryptophan (imidazole group)
+        "Y": 10.46,  # Tyrosine (phenolic OH)
+    },
+):
+    aminoacid_pIs = []
+    total_charge = 0.0
 
+    # Calculate pI for each amino acid in the sequence while preserving case
+    for aa in sequence:
+        aa_upper = aa.upper()
+        if aa_upper in pKa_values:
+            pI = pKa_values[aa_upper]
+
+            if aa.isupper():
+                aminoacid_pIs.append((aa_upper, pI))
+            else:
+                aminoacid_pIs.append((aa, pI))
+            total_charge += pI
+
+    # Calculate the overall pI of the sequence
+    overall_pI = total_charge / len(sequence)
+    overall_pI = round(overall_pI, 2)
+
+    return aminoacid_pIs, overall_pI
+
+
+def build_scoring_matrix(match_score, mismatch_score):
+    amino_acids = "ACDEFGHIKLMNPQRSTVWY"  # Amino acid alphabet
+
+    # Initialize an empty scoring matrix as a dictionary of dictionaries
+    scoring_matrix = {}
+
+    for aa1 in amino_acids:
+        scoring_matrix[aa1] = {}
+        for aa2 in amino_acids:
+            scoring_matrix[aa1][aa2] = (
+                match_score if aa1.upper() == aa2.upper() else mismatch_score
+            )
+
+    return scoring_matrix
+
+
+def needleman_wunsch(
+    seq1, seq2, scoring_matrix=None, gap_penalty=-1, match_score=1, mismatch_score=-1
+):
+    if scoring_matrix is None:
+        # Default scoring matrix if not provided
+        scoring_matrix = build_scoring_matrix(match_score, mismatch_score)
+
+    seq1_upper = seq1.upper()  # Convert seq1 to uppercase
+    seq2_upper = seq2.upper()  # Convert seq2 to uppercase
+
+    m, n = len(seq1_upper), len(seq2_upper)
+
+    # Initialize matrices
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+    traceback = [[""] * (n + 1) for _ in range(m + 1)]
+
+    # Fill in the scoring matrix and traceback matrix
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            match = dp[i - 1][j - 1] + scoring_matrix.get(seq1_upper[i - 1], {}).get(
+                seq2_upper[j - 1], mismatch_score
+            )
+            delete = dp[i - 1][j] + gap_penalty
+            insert = dp[i][j - 1] + gap_penalty
+
+            dp[i][j] = max(match, delete, insert)
+
+            if dp[i][j] == match:
+                traceback[i][j] = "D"  # Diagonal (indicates a match/mismatch)
+            elif dp[i][j] == delete:
+                traceback[i][j] = "U"  # Up (indicates a gap in seq2)
+            else:
+                traceback[i][j] = "L"  # Left (indicates a gap in seq1)
+
+    # Traceback to find the aligned sequences while preserving case
+    aligned_seq1, aligned_seq2 = [], []
+    i, j = m, n
+    while i > 0 or j > 0:
+        if i > 0 and j > 0 and traceback[i][j] == "D":
+            if seq1[i - 1].isupper():
+                aligned_seq1.append(seq1_upper[i - 1])
+            else:
+                aligned_seq1.append(seq1[i - 1])
+            if seq2[j - 1].isupper():
+                aligned_seq2.append(seq2_upper[j - 1])
+            else:
+                aligned_seq2.append(seq2[j - 1])
+            i -= 1
+            j -= 1
+        elif i > 0 and traceback[i][j] == "U":
+            if seq1[i - 1].isupper():
+                aligned_seq1.append(seq1_upper[i - 1])
+            else:
+                aligned_seq1.append(seq1[i - 1])
+            aligned_seq2.append("-")
+            i -= 1
+        else:
+            aligned_seq1.append("-")
+            if seq2[j - 1].isupper():
+                aligned_seq2.append(seq2_upper[j - 1])
+            else:
+                aligned_seq2.append(seq2[j - 1])
+            j -= 1
+
+    aligned_seq1 = "".join(reversed(aligned_seq1))
+    aligned_seq2 = "".join(reversed(aligned_seq2))
+
+    return aligned_seq1, aligned_seq2, dp[m][n]

From 27a72c7c94d4f1be5754a76f43669f577c8f4a86 Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Wed, 27 Sep 2023 16:21:23 +0300
Subject: [PATCH 06/25] Add auxiliary function build_scoring_matrix for the
 needleman-wunsch function

---
 HW4_Trofimov/das_protein_tools.py | 111 +++++++++---------------------
 1 file changed, 34 insertions(+), 77 deletions(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 5a340cc..38db14b 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -15,7 +15,21 @@ def calculate_pI(
         "W": 11.55,  # Tryptophan (imidazole group)
         "Y": 10.46,  # Tyrosine (phenolic OH)
     },
-):
+) -> str:
+    """
+    Calculates isoelectric point of a whole aminoacid sequence and for each aminoacid individually
+
+    Args:
+    - sequence (str): sequence for which to calculate isoelectric point
+    - pKa_values (dict): acid dissociation constants for each aminoacid
+
+    Return:
+    - str: string, which contains:
+            - an original sequence,
+            - list of tuple pairs of aminoacid and corresponding isoelectric point,
+            - overall isoelectric point of sequence
+    """
+
     aminoacid_pIs = []
     total_charge = 0.0
 
@@ -35,90 +49,33 @@ def calculate_pI(
     overall_pI = total_charge / len(sequence)
     overall_pI = round(overall_pI, 2)
 
-    return aminoacid_pIs, overall_pI
+    return f"Sequence: {sequence}. Isoelectric point of each aminoacid: {aminoacid_pIs}, Sequence's isoelectric point: {overall_pI}"
 
 
-def build_scoring_matrix(match_score, mismatch_score):
-    amino_acids = "ACDEFGHIKLMNPQRSTVWY"  # Amino acid alphabet
+def build_scoring_matrix(
+    match_score: int,
+    mismatch_score: int,
+    amino_acid_alphabet: str = "ACDEFGHIKLMNPQRSTVWY",
+) -> dict:
+    """
+    Build a default scoring matrix, if not provided in needleman-wunsch function parameter, as a dictionary of dictionaries
+
+    Args:
+    - match_score (int): integer value of a matching score of aminoacids
+    - mismatch_score (int): integer value of a mismatching score of aminoacids
+    - amino_acid_alphabet (str): upper case amino acid alphabet
+
+    Returns:
+    - dictionary of dictionaries of aminoacids scores. In which this dictionary contain aminoacid as a key and its value a dictionary of scores
+    """
 
-    # Initialize an empty scoring matrix as a dictionary of dictionaries
     scoring_matrix = {}
 
-    for aa1 in amino_acids:
+    for aa1 in amino_acid_alphabet:
         scoring_matrix[aa1] = {}
-        for aa2 in amino_acids:
+        for aa2 in amino_acid_alphabet:
             scoring_matrix[aa1][aa2] = (
                 match_score if aa1.upper() == aa2.upper() else mismatch_score
             )
 
     return scoring_matrix
-
-
-def needleman_wunsch(
-    seq1, seq2, scoring_matrix=None, gap_penalty=-1, match_score=1, mismatch_score=-1
-):
-    if scoring_matrix is None:
-        # Default scoring matrix if not provided
-        scoring_matrix = build_scoring_matrix(match_score, mismatch_score)
-
-    seq1_upper = seq1.upper()  # Convert seq1 to uppercase
-    seq2_upper = seq2.upper()  # Convert seq2 to uppercase
-
-    m, n = len(seq1_upper), len(seq2_upper)
-
-    # Initialize matrices
-    dp = [[0] * (n + 1) for _ in range(m + 1)]
-    traceback = [[""] * (n + 1) for _ in range(m + 1)]
-
-    # Fill in the scoring matrix and traceback matrix
-    for i in range(1, m + 1):
-        for j in range(1, n + 1):
-            match = dp[i - 1][j - 1] + scoring_matrix.get(seq1_upper[i - 1], {}).get(
-                seq2_upper[j - 1], mismatch_score
-            )
-            delete = dp[i - 1][j] + gap_penalty
-            insert = dp[i][j - 1] + gap_penalty
-
-            dp[i][j] = max(match, delete, insert)
-
-            if dp[i][j] == match:
-                traceback[i][j] = "D"  # Diagonal (indicates a match/mismatch)
-            elif dp[i][j] == delete:
-                traceback[i][j] = "U"  # Up (indicates a gap in seq2)
-            else:
-                traceback[i][j] = "L"  # Left (indicates a gap in seq1)
-
-    # Traceback to find the aligned sequences while preserving case
-    aligned_seq1, aligned_seq2 = [], []
-    i, j = m, n
-    while i > 0 or j > 0:
-        if i > 0 and j > 0 and traceback[i][j] == "D":
-            if seq1[i - 1].isupper():
-                aligned_seq1.append(seq1_upper[i - 1])
-            else:
-                aligned_seq1.append(seq1[i - 1])
-            if seq2[j - 1].isupper():
-                aligned_seq2.append(seq2_upper[j - 1])
-            else:
-                aligned_seq2.append(seq2[j - 1])
-            i -= 1
-            j -= 1
-        elif i > 0 and traceback[i][j] == "U":
-            if seq1[i - 1].isupper():
-                aligned_seq1.append(seq1_upper[i - 1])
-            else:
-                aligned_seq1.append(seq1[i - 1])
-            aligned_seq2.append("-")
-            i -= 1
-        else:
-            aligned_seq1.append("-")
-            if seq2[j - 1].isupper():
-                aligned_seq2.append(seq2_upper[j - 1])
-            else:
-                aligned_seq2.append(seq2[j - 1])
-            j -= 1
-
-    aligned_seq1 = "".join(reversed(aligned_seq1))
-    aligned_seq2 = "".join(reversed(aligned_seq2))
-
-    return aligned_seq1, aligned_seq2, dp[m][n]

From 399b7f2b90233e9e59df428b5d672a2ee8a8182e Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Wed, 27 Sep 2023 16:43:45 +0300
Subject: [PATCH 07/25] Add needleman-wunsch function

---
 HW4_Trofimov/das_protein_tools.py | 102 +++++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 2 deletions(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 38db14b..0e4e948 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -58,7 +58,7 @@ def build_scoring_matrix(
     amino_acid_alphabet: str = "ACDEFGHIKLMNPQRSTVWY",
 ) -> dict:
     """
-    Build a default scoring matrix, if not provided in needleman-wunsch function parameter, as a dictionary of dictionaries
+    Build a default scoring matrix, if not provided in needleman-wunsch function parameter
 
     Args:
     - match_score (int): integer value of a matching score of aminoacids
@@ -66,7 +66,7 @@ def build_scoring_matrix(
     - amino_acid_alphabet (str): upper case amino acid alphabet
 
     Returns:
-    - dictionary of dictionaries of aminoacids scores. In which this dictionary contain aminoacid as a key and its value a dictionary of scores
+    - a dictionary of dictionaries representing a scoring matrix for aminoacids paris. Key of a dictionary is an aminoacid and its value is a dictionary of scores
     """
 
     scoring_matrix = {}
@@ -79,3 +79,101 @@ def build_scoring_matrix(
             )
 
     return scoring_matrix
+
+
+def needleman_wunsch(
+    seq1: str,
+    seq2: str,
+    scoring_matrix: dict = None,
+    gap_penalty: int = -1,
+    match_score: int = 1,
+    mismatch_score: int = -1,
+) -> str:
+    """
+    Uses Needleman-Wunsch algorithm to make a global alignment of two sequences.
+
+    Args:
+    - seq1 (str): first aminoacid sequence for alignment
+    - seq2 (str): second aminoacid sequence for alignment
+    - scoring_matrix (dict): A dictionary representing a scoring matrix for amino acid pairs
+      If not provided, a default scoring_matrix is generated based on match and mismatch scores
+    - gap_penalty (int): integer va;ue of a penalty score for introducing a gap in the alignment
+    - match_score (int): integer value of a matching score for matching aminoacids
+    - mismatch_score (int): integer value of a mismatching score for mismatched aminoacids
+
+    Returns:
+    - string: a string containing the aligned sequences (str), the aligned score (int)
+    """
+    if scoring_matrix is None:
+        # Default scoring matrix if not provided
+        scoring_matrix = build_scoring_matrix(match_score, mismatch_score)
+
+    seq1_upper = seq1.upper()  # Convert seq1 to uppercase
+    seq2_upper = seq2.upper()  # Convert seq2 to uppercase
+
+    m, n = len(seq1_upper), len(seq2_upper)
+
+    # Initialize matrices
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+    traceback = [[""] * (n + 1) for _ in range(m + 1)]
+
+    # Fill in the scoring matrix and traceback matrix
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            match = dp[i - 1][j - 1] + scoring_matrix.get(seq1_upper[i - 1], {}).get(
+                seq2_upper[j - 1], mismatch_score
+            )
+            delete = dp[i - 1][j] + gap_penalty
+            insert = dp[i][j - 1] + gap_penalty
+
+            dp[i][j] = max(match, delete, insert)
+
+            if dp[i][j] == match:
+                traceback[i][j] = "D"  # Diagonal (indicates a match/mismatch)
+            elif dp[i][j] == delete:
+                traceback[i][j] = "U"  # Up (indicates a gap in seq2)
+            else:
+                traceback[i][j] = "L"  # Left (indicates a gap in seq1)
+
+    # Traceback to find the aligned sequences while preserving case
+    aligned_seq1, aligned_seq2 = [], []
+    i, j = m, n
+    while i > 0 or j > 0:
+        if i > 0 and j > 0 and traceback[i][j] == "D":
+            # check original case of amionacid in seq1
+            if seq1[i - 1].isupper():
+                aligned_seq1.append(seq1_upper[i - 1])
+            else:
+                aligned_seq1.append(seq1[i - 1])
+
+            # check original case of amionacid in seq2
+            if seq2[j - 1].isupper():
+                aligned_seq2.append(seq2_upper[j - 1])
+            else:
+                aligned_seq2.append(seq2[j - 1])
+
+            i -= 1
+            j -= 1
+        elif i > 0 and traceback[i][j] == "U":
+            # check original case of amionacid in seq1
+            if seq1[i - 1].isupper():
+                aligned_seq1.append(seq1_upper[i - 1])
+            else:
+                aligned_seq1.append(seq1[i - 1])
+            aligned_seq2.append("-")
+
+            i -= 1
+        else:
+            aligned_seq1.append("-")
+            # check original case of amionacid in seq2
+            if seq2[j - 1].isupper():
+                aligned_seq2.append(seq2_upper[j - 1])
+            else:
+                aligned_seq2.append(seq2[j - 1])
+
+            j -= 1
+
+    aligned_seq1 = "".join(reversed(aligned_seq1))
+    aligned_seq2 = "".join(reversed(aligned_seq2))
+
+    return f"{aligned_seq1}, {aligned_seq2}, final score: {dp[m][n]}"

From 40af0025bef15eed7521af113b1fc35036ad5328 Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Wed, 27 Sep 2023 17:03:09 +0300
Subject: [PATCH 08/25] Create README.md

---
 HW4_Trofimov/README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 HW4_Trofimov/README.md

diff --git a/HW4_Trofimov/README.md b/HW4_Trofimov/README.md
new file mode 100644
index 0000000..975059e
--- /dev/null
+++ b/HW4_Trofimov/README.md
@@ -0,0 +1,14 @@
+<img width="641" alt="Screenshot 2023-09-27 at 17 01 47" src="https://github.com/michtrofimov/HW4_Functions2/assets/92677906/d5c63a17-7f6d-43c7-b88e-a2e994877abb">
+
+# Das biotools.aminoacids
+> **The great and terrifying successor of biopython**
+
+Das biotools strikes again! Now it works only with aminoacid sequences! 
+
+## Features
+
+- **calculate_pI**: Calculate the isoelectric point of a given amino acid sequence, both individually for each amino acid and for the entire sequence.
+
+- **build_scoring_matrix**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
+
+- **needleman_wunsch**: Implement the Needleman-Wunsch algorithm for global sequence alignment of two amino acid sequences.

From 8612f1726773b61c033667d870ec394ea54acce9 Mon Sep 17 00:00:00 2001
From: Alisa Fedorenko <afedorenko00@gmail.com>
Date: Thu, 28 Sep 2023 16:58:15 +0300
Subject: [PATCH 09/25] Add function calculating aminoacids fruquencies

---
 HW4_Trofimov/das_protein_tools.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 0e4e948..b92795e 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -177,3 +177,27 @@ def needleman_wunsch(
     aligned_seq2 = "".join(reversed(aligned_seq2))
 
     return f"{aligned_seq1}, {aligned_seq2}, final score: {dp[m][n]}"
+
+def calculate_aa_freq(seq: str) -> dict:
+    """
+    Calculates the frequency of each amino acid in a protein sequence or sequences.
+
+    :param sequences: protein sequence or sequences
+    :type sequences: str or list of str
+    :return: dictionary with the frequency of each amino acid
+    :rtype: dict
+    """
+    sequences = ''
+
+    # Creating a dictionary with aminoacid frequencies:
+    amino_acid_frequency = {}
+
+    for amino_acid in sequences:
+        # If the aminoacid has been already in:
+        if amino_acid in amino_acid_frequency:
+            amino_acid_frequency[amino_acid] += 1
+        # If the aminoacid hasn't been already in:
+        else:
+            amino_acid_frequency[amino_acid] = 1
+
+    return amino_acid_frequency

From 264e6cf8aa3ffc8adcf039ab84ec5200655e0680 Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 17:48:18 +0300
Subject: [PATCH 10/25] Add file with dictionaries

---
 HW4_Trofimov/protein_dict.py | 73 ++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 HW4_Trofimov/protein_dict.py

diff --git a/HW4_Trofimov/protein_dict.py b/HW4_Trofimov/protein_dict.py
new file mode 100644
index 0000000..b95c713
--- /dev/null
+++ b/HW4_Trofimov/protein_dict.py
@@ -0,0 +1,73 @@
+#Dictionary: keys - single-letter amino acid designations; values - list of RNA codons
+aa_codon_dict = {
+    'G': ['GGA', 'GGU', 'GGC', 'GGG'],
+    'R': ['AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGU'],
+    'S': ['AGC', 'AGU', 'UCA', 'UCC', 'UCG', 'UCU'],
+    'E': ['GAA', 'GAG'],
+    'P': ['CCA', 'CCC', 'CCG', 'CCU'],
+    'L': ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG'],
+    'V': ['GUA', 'GUC', 'GUG', 'GUU'],
+    'T': ['ACA', 'ACC', 'ACG', 'ACU'],
+    'A': ['GCA', 'GCC', 'GCG', 'GCU'],
+    'I': ['AUA', 'AUC', 'AUU'],
+    'F': ['UUC', 'UUU'],
+    'H': ['CAC', 'CAU'],
+    'Y': ['UAC', 'UAU'],
+    'Q': ['CAA', 'CAG'],
+    'C': ['UGC', 'UGU'],
+    'N': ['AAC', 'AAU'],
+    'D': ['GAC', 'GAU'],
+    'K': ['AAA', 'AAG'],
+    'M': ['AUG'],
+    'W': ['UGG'],
+}
+
+
+#Dictionary: keys - single-letter amino acid designations; values - names of amino acids
+aa_one_to_three_letter = {
+    'A' : 'Ala-', 
+    'C' : 'Cys-', 
+    'D' : 'Asp-', 
+    'E' : 'Glu-',
+    'F' : 'Phe-', 
+    'G' : 'Gly-',
+    'H' : 'His-', 
+    'I' : 'Ile-',
+    'K' : 'Lys-', 
+    'L' : 'Leu-',
+    'M' : 'Met-', 
+    'N' : 'Asn-',
+    'P' : 'Pro-', 
+    'Q' : 'Gln-', 
+    'R' : 'Arg-', 
+    'S' : 'Ser-',
+    'T' : 'Thr-', 
+    'V' : 'Val-', 
+    'W' : 'Trp-', 
+    'Y' : 'Tyr-',
+}
+
+
+# aminoacids mass dictionary
+aa_monoistopic_mass_dict = {
+    'A' : 71.03711, 
+    'C' : 103.00919, 
+    'D' : 115.02694, 
+    'E' : 129.04259,
+    'F' : 147.06841, 
+    'G' : 57.02146,
+    'H' : 137.05891, 
+    'I' : 113.08406,
+    'K' : 128.09496, 
+    'L' : 113.08406,
+    'M' : 131.04049, 
+    'N' : 114.04293,
+    'P' : 97.05276, 
+    'Q' : 128.05858, 
+    'R' : 156.10111, 
+    'S' : 87.03203,
+    'T' : 101.04768, 
+    'V' : 99.06841, 
+    'W' : 186.07931, 
+    'Y' : 163.06333,
+}

From 428a23145de85683b84246713897550a5651b7fb Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 17:50:08 +0300
Subject: [PATCH 11/25] Add modules import and is_protein function

---
 HW4_Trofimov/das_protein_tools.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index b92795e..53a995b 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -1,3 +1,24 @@
+# importing necessary modules 
+import protein_dict as pd
+from random import choice
+
+
+# Function to determine is the sequence is a protein or not
+def is_protein(seq: str) -> bool:
+    """
+    This function checks if the sequence is a protein or not
+
+    Arguments:
+        seq (str): A sequence of aminoacids
+
+    Output:
+        returns True or False 
+    """
+    unique_chars = set(seq)
+    aminoacids = set(pd.aa_monoistopic_mass_dict.keys())
+    return bool(unique_chars <= aminoacids)
+
+
 def calculate_pI(
     sequence: str,
     pKa_values: dict = {

From 7954e4afaa8938b3656ccfc6752c9241bf5527d9 Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 17:53:25 +0300
Subject: [PATCH 12/25] Add comments to previously written functions

---
 HW4_Trofimov/das_protein_tools.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 53a995b..31fe1d8 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -19,6 +19,7 @@ def is_protein(seq: str) -> bool:
     return bool(unique_chars <= aminoacids)
 
 
+# Function to calculate pI
 def calculate_pI(
     sequence: str,
     pKa_values: dict = {
@@ -73,6 +74,7 @@ def calculate_pI(
     return f"Sequence: {sequence}. Isoelectric point of each aminoacid: {aminoacid_pIs}, Sequence's isoelectric point: {overall_pI}"
 
 
+#Function to build scoring matrix for needleman_wunsch function
 def build_scoring_matrix(
     match_score: int,
     mismatch_score: int,
@@ -102,6 +104,7 @@ def build_scoring_matrix(
     return scoring_matrix
 
 
+# Function to perform alignment based on needleman_wunsch algorithm
 def needleman_wunsch(
     seq1: str,
     seq2: str,
@@ -199,6 +202,8 @@ def needleman_wunsch(
 
     return f"{aligned_seq1}, {aligned_seq2}, final score: {dp[m][n]}"
 
+
+# Function to calculate frequency of unique aminoacid in the sequence
 def calculate_aa_freq(seq: str) -> dict:
     """
     Calculates the frequency of each amino acid in a protein sequence or sequences.
@@ -222,3 +227,5 @@ def calculate_aa_freq(seq: str) -> dict:
             amino_acid_frequency[amino_acid] = 1
 
     return amino_acid_frequency
+
+

From 93feb9f2a026cec29b5fe493a6c2b35b5cd98f56 Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 18:08:10 +0300
Subject: [PATCH 13/25] Add function to convert one-letter aminoacid sequence
 to three-letter

---
 HW4_Trofimov/das_protein_tools.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 31fe1d8..4c4e4e8 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -229,3 +229,22 @@ def calculate_aa_freq(seq: str) -> dict:
     return amino_acid_frequency
 
 
+# Convert one-letter protein sequence to three-letter protein sequence
+def convert_to_3L_code(seq: str) -> str:
+    """
+    This function takes one letter aminoacids sequence and convert's it to three leter coding
+
+    Arguments:
+        seq (str): A sequence of aminoacids
+
+    Output:
+        same sequence but in three-letter coding
+    """
+    seq = seq.upper()
+    if is_protein(seq) is True:
+        sequence = ''.join(pd.aa_one_to_three_letter.get(aa) for aa in seq)
+        return sequence[:-1]
+    else:
+        raise ValueError("Sequence is not a protein, input should be protein")
+
+

From 8d1dbb86bf1e414ee36b149691822161c7faf2ad Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 18:19:28 +0300
Subject: [PATCH 14/25] Add function to calculate protein mass

---
 HW4_Trofimov/das_protein_tools.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 4c4e4e8..e1fcd4e 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -248,3 +248,22 @@ def convert_to_3L_code(seq: str) -> str:
         raise ValueError("Sequence is not a protein, input should be protein")
 
 
+# Function to calculate protein mass
+def protein_mass (seq: str) -> float:
+    """
+    This function takes aminoacids sequence and counts it's summary molecular weight using monoisotopic masses
+
+    Arguments:
+        seq (str): A sequence of aminoacids
+
+    Output:
+        returns molecular weight 
+    """
+    seq = seq.upper()
+    if is_protein(seq) is True:
+        mass = sum(pd.aa_monoistopic_mass_dict.get(aa) for aa in seq)
+        return mass
+    else:
+        raise ValueError("Sequence is not a protein, input should be protein")
+    
+

From c24525987d28fd293376cf0feb5dff5392d53278 Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 18:34:17 +0300
Subject: [PATCH 15/25] Add function to translate protein sequence to RNA

---
 HW4_Trofimov/das_protein_tools.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index e1fcd4e..a5183d6 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -229,7 +229,7 @@ def calculate_aa_freq(seq: str) -> dict:
     return amino_acid_frequency
 
 
-# Convert one-letter protein sequence to three-letter protein sequence
+# Function to convert one-letter protein sequence to three-letter protein sequence
 def convert_to_3L_code(seq: str) -> str:
     """
     This function takes one letter aminoacids sequence and convert's it to three leter coding
@@ -267,3 +267,25 @@ def protein_mass (seq: str) -> float:
         raise ValueError("Sequence is not a protein, input should be protein")
     
 
+# Function to translate Protein to RNA 
+def translate_protein_rna(seq: str) -> str:
+    """
+    This function takes  aminoacid sequence and translates in to the RNA. 
+    As most of the aminoacids are coded with several different codons, 
+    this function will take a random codon of the set for such aminoacids.
+
+    Arguments:
+        seq (str): A sequence of RNA molecule
+
+    Output:
+        returns sequence of aminoacids 
+    """
+    seq = seq.upper()
+    if is_protein(seq) is True:
+        rna = ''
+        for aa in seq:
+            codon = choice(pd.aa_codon_dict.get(aa))
+            rna += codon
+        return rna
+    else:
+        raise ValueError("Sequence is not a protein, input should be a protein")

From e6b19dd3c687d09e0362997386ed443b53f9662c Mon Sep 17 00:00:00 2001
From: ShakirSuleimanov <suleymanov-ef@mail.ru>
Date: Thu, 28 Sep 2023 23:01:39 +0300
Subject: [PATCH 16/25] Add description for my functions

---
 HW4_Trofimov/README.md | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/HW4_Trofimov/README.md b/HW4_Trofimov/README.md
index 975059e..b201973 100644
--- a/HW4_Trofimov/README.md
+++ b/HW4_Trofimov/README.md
@@ -7,8 +7,26 @@ Das biotools strikes again! Now it works only with aminoacid sequences!
 
 ## Features
 
-- **calculate_pI**: Calculate the isoelectric point of a given amino acid sequence, both individually for each amino acid and for the entire sequence.
+- **calculate_pI()**: Calculate the isoelectric point of a given amino acid sequence, both individually for each amino acid and for the entire sequence.
 
-- **build_scoring_matrix**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
+- **build_scoring_matri())**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
 
-- **needleman_wunsch**: Implement the Needleman-Wunsch algorithm for global sequence alignment of two amino acid sequences.
+- **needleman_wunsch()**: Implement the Needleman-Wunsch algorithm for global sequence alignment of two amino acid sequences.
+
+- **convert_to_3L_code()**: Converts one letter animoacid sequence to three letter aminoacid sequence.
+
+```python 
+convert_to_3L_code('ACDEF') -> 'Ala-Cys-Asp-Glu-Phe'
+```
+
+- **protein_mass()**: Calculates molecular weight of the aminoacid sequence using monoisotopic masses.
+
+```python 
+protein_mass('ACDEF') -> 565.184
+```
+
+- **translate_protein_rna()**: Converts aminoacid sequence to RNA sequence. For those aminoacids that are coded with more than one codon, this function randomly chooses one codon from the set.
+
+```python 
+translate_protein_rna('ACDEF') -> 'GCCUGCGACGAGUUC'
+```
\ No newline at end of file

From 8b5985f6362f6d5bac3a1db428bc412535376fd4 Mon Sep 17 00:00:00 2001
From: Alisa Fedorenko <afedorenko00@gmail.com>
Date: Fri, 29 Sep 2023 23:29:47 +0300
Subject: [PATCH 17/25] add main function

---
 HW4_Trofimov/das_protein_tools.py | 46 ++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index a5183d6..d179c94 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -3,6 +3,10 @@
 from random import choice
 
 
+AMINO_LETTERS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
+                 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
+
+
 # Function to determine is the sequence is a protein or not
 def is_protein(seq: str) -> bool:
     """
@@ -12,7 +16,7 @@ def is_protein(seq: str) -> bool:
         seq (str): A sequence of aminoacids
 
     Output:
-        returns True or False 
+        returns True or False
     """
     unique_chars = set(seq)
     aminoacids = set(pd.aa_monoistopic_mass_dict.keys())
@@ -257,7 +261,7 @@ def protein_mass (seq: str) -> float:
         seq (str): A sequence of aminoacids
 
     Output:
-        returns molecular weight 
+        returns molecular weight
     """
     seq = seq.upper()
     if is_protein(seq) is True:
@@ -265,20 +269,20 @@ def protein_mass (seq: str) -> float:
         return mass
     else:
         raise ValueError("Sequence is not a protein, input should be protein")
-    
 
-# Function to translate Protein to RNA 
+
+# Function to translate Protein to RNA
 def translate_protein_rna(seq: str) -> str:
     """
-    This function takes  aminoacid sequence and translates in to the RNA. 
-    As most of the aminoacids are coded with several different codons, 
+    This function takes  aminoacid sequence and translates in to the RNA.
+    As most of the aminoacids are coded with several different codons,
     this function will take a random codon of the set for such aminoacids.
 
     Arguments:
         seq (str): A sequence of RNA molecule
 
     Output:
-        returns sequence of aminoacids 
+        returns sequence of aminoacids
     """
     seq = seq.upper()
     if is_protein(seq) is True:
@@ -289,3 +293,31 @@ def translate_protein_rna(seq: str) -> str:
         return rna
     else:
         raise ValueError("Sequence is not a protein, input should be a protein")
+
+
+def main(*args):
+    action = args[-1]
+    action_list = {
+        "calculate_pI": calculate_pI,
+        "build_scoring_matrix": build_scoring_matrix,
+        "needleman_wunsch": needleman_wunsch,
+        "calculate_aa_freq": calculate_aa_freq,
+        "translate_protein_rna": translate_protein_rna,
+        "convert_to_3L_code": convert_to_3L_code,
+        "protein_mass": protein_mass
+    }
+
+    if action not in action_list:
+        raise ValueError(f"No such action: {action}")
+
+    if not (action == "needleman_wunsch" and len(args) == 3 or
+            action != "needleman_wunsch" and len(args) == 2):
+        raise ValueError("Error in number of sequences")
+
+    for sequence in args[:-1]:
+        if not all([letter.capitalize() in AMINO_LETTERS for letter in sequence]):
+            raise ValueError(f"The sequence is not protein sequence: {sequence}")
+
+    result = action_list[action](*args[:-1])
+
+    return result

From 15f5db5117649674a3c3f2c588498800c0d76cab Mon Sep 17 00:00:00 2001
From: Alisa Fedorenko <afedorenko00@gmail.com>
Date: Sat, 30 Sep 2023 07:09:47 +0300
Subject: [PATCH 18/25] Add description and usage example of calculate_aa_freq
 function

---
 HW4_Trofimov/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/HW4_Trofimov/README.md b/HW4_Trofimov/README.md
index d80ff30..ca1bea3 100644
--- a/HW4_Trofimov/README.md
+++ b/HW4_Trofimov/README.md
@@ -19,6 +19,8 @@ Das biotools strikes again! Now it works only with aminoacid sequences!
 
 - **translate_protein_rna()**: Converts aminoacid sequence to RNA sequence. For those aminoacids that are coded with more than one codon, this function randomly chooses one codon from the set.
 
+- **calculate_aa_freq()**: Calculate the frequences of aminoacids in protein sequences.
+
 ## Examples
 
 - **get_pI**
@@ -50,3 +52,9 @@ protein_mass('ACDEF') -> 565.184
 ```python 
 translate_protein_rna('ACDEF') -> 'GCCUGCGACGAGUUC'
 ```
+
+- **calculate_aa_freq**
+
+```python
+calculate_aa_freq('ACDEF') -> {'A': 1, 'C': 1, 'D': 1, 'E': 1, 'F': 1}
+```

From b45a72d41d525ac2983ba158639d7115b850d658 Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Sat, 30 Sep 2023 15:39:32 +0300
Subject: [PATCH 19/25] Redo function for isoelectric point calc

---
 HW4_Trofimov/das_protein_tools.py |  73 +++++++--------
 HW4_Trofimov/protein_dict.py      | 148 +++++++++++++++++-------------
 2 files changed, 118 insertions(+), 103 deletions(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index d179c94..cb90b5b 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -1,4 +1,4 @@
-# importing necessary modules 
+# importing necessary modules
 import protein_dict as pd
 from random import choice
 
@@ -23,66 +23,53 @@ def is_protein(seq: str) -> bool:
     return bool(unique_chars <= aminoacids)
 
 
-# Function to calculate pI
-def calculate_pI(
+# Function to get pI for each aa
+def get_pI(
     sequence: str,
-    pKa_values: dict = {
-        "D": 3.86,  # Aspartic acid (COOH side chain)
-        "E": 4.25,  # Glutamic acid (COOH side chain)
-        "C": 8.33,  # Cysteine (R-SH)
-        "Y": 10.46,  # Tyrosine (phenolic OH)
-        "H": 6.0,  # Histidine (imidazole group)
-        "K": 10.67,  # Lysine (amino group)
-        "R": 12.48,  # Arginine (guanidinium group)
-        "N": 3.22,  # Asparagine (amino group)
-        "Q": 3.65,  # Glutamine (amino group)
-        "T": 2.95,  # Threonine (amino group)
-        "S": 2.19,  # Serine (hydroxyl group)
-        "W": 11.55,  # Tryptophan (imidazole group)
-        "Y": 10.46,  # Tyrosine (phenolic OH)
-    },
+    pI_values: dict = None,
 ) -> str:
     """
-    Calculates isoelectric point of a whole aminoacid sequence and for each aminoacid individually
+    Gives isoelectric point value for each aminoacid individually
 
     Args:
     - sequence (str): sequence for which to calculate isoelectric point
-    - pKa_values (dict): acid dissociation constants for each aminoacid
+    - pI_values (dict): acid dissociation constants for each aminoacid
 
     Return:
     - str: string, which contains:
             - an original sequence,
             - list of tuple pairs of aminoacid and corresponding isoelectric point,
-            - overall isoelectric point of sequence
     """
 
+    if pI_values is None:
+        # Default pKa_values if not provided
+        pI_values = pd.aa_pI
+
     aminoacid_pIs = []
-    total_charge = 0.0
 
     # Calculate pI for each amino acid in the sequence while preserving case
+    analysed_aa = []
     for aa in sequence:
         aa_upper = aa.upper()
-        if aa_upper in pKa_values:
-            pI = pKa_values[aa_upper]
-
-            if aa.isupper():
-                aminoacid_pIs.append((aa_upper, pI))
-            else:
-                aminoacid_pIs.append((aa, pI))
-            total_charge += pI
-
-    # Calculate the overall pI of the sequence
-    overall_pI = total_charge / len(sequence)
-    overall_pI = round(overall_pI, 2)
+        if aa_upper not in analysed_aa:
+            if aa_upper in pI_values:
+                pI = pI_values[aa_upper]
+                analysed_aa.append(aa_upper)
+                if aa.isupper():
+                    aminoacid_pIs.append((aa_upper, pI))
+                else:
+                    aminoacid_pIs.append((aa, pI))
+        else:
+            continue
 
-    return f"Sequence: {sequence}. Isoelectric point of each aminoacid: {aminoacid_pIs}, Sequence's isoelectric point: {overall_pI}"
+    return f"Sequence: {sequence}. Isoelectric point of each aminoacid: {aminoacid_pIs}"
 
 
-#Function to build scoring matrix for needleman_wunsch function
+# Function to build scoring matrix for needleman_wunsch function
 def build_scoring_matrix(
     match_score: int,
     mismatch_score: int,
-    amino_acid_alphabet: str = "ACDEFGHIKLMNPQRSTVWY",
+    amino_acid_alphabet: str = None,
 ) -> dict:
     """
     Build a default scoring matrix, if not provided in needleman-wunsch function parameter
@@ -96,6 +83,10 @@ def build_scoring_matrix(
     - a dictionary of dictionaries representing a scoring matrix for aminoacids paris. Key of a dictionary is an aminoacid and its value is a dictionary of scores
     """
 
+    if amino_acid_alphabet is None:
+        # Default pKa_values if not provided
+        amino_acid_alphabet = "ACDEFGHIKLMNPQRSTVWY"
+
     scoring_matrix = {}
 
     for aa1 in amino_acid_alphabet:
@@ -217,7 +208,7 @@ def calculate_aa_freq(seq: str) -> dict:
     :return: dictionary with the frequency of each amino acid
     :rtype: dict
     """
-    sequences = ''
+    sequences = ""
 
     # Creating a dictionary with aminoacid frequencies:
     amino_acid_frequency = {}
@@ -246,14 +237,14 @@ def convert_to_3L_code(seq: str) -> str:
     """
     seq = seq.upper()
     if is_protein(seq) is True:
-        sequence = ''.join(pd.aa_one_to_three_letter.get(aa) for aa in seq)
+        sequence = "".join(pd.aa_one_to_three_letter.get(aa) for aa in seq)
         return sequence[:-1]
     else:
         raise ValueError("Sequence is not a protein, input should be protein")
 
 
 # Function to calculate protein mass
-def protein_mass (seq: str) -> float:
+def protein_mass(seq: str) -> float:
     """
     This function takes aminoacids sequence and counts it's summary molecular weight using monoisotopic masses
 
@@ -286,7 +277,7 @@ def translate_protein_rna(seq: str) -> str:
     """
     seq = seq.upper()
     if is_protein(seq) is True:
-        rna = ''
+        rna = ""
         for aa in seq:
             codon = choice(pd.aa_codon_dict.get(aa))
             rna += codon
diff --git a/HW4_Trofimov/protein_dict.py b/HW4_Trofimov/protein_dict.py
index b95c713..a7e05c2 100644
--- a/HW4_Trofimov/protein_dict.py
+++ b/HW4_Trofimov/protein_dict.py
@@ -1,73 +1,97 @@
-#Dictionary: keys - single-letter amino acid designations; values - list of RNA codons
+# Dictionary: keys - single-letter amino acid designations; values - list of RNA codons
 aa_codon_dict = {
-    'G': ['GGA', 'GGU', 'GGC', 'GGG'],
-    'R': ['AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGU'],
-    'S': ['AGC', 'AGU', 'UCA', 'UCC', 'UCG', 'UCU'],
-    'E': ['GAA', 'GAG'],
-    'P': ['CCA', 'CCC', 'CCG', 'CCU'],
-    'L': ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG'],
-    'V': ['GUA', 'GUC', 'GUG', 'GUU'],
-    'T': ['ACA', 'ACC', 'ACG', 'ACU'],
-    'A': ['GCA', 'GCC', 'GCG', 'GCU'],
-    'I': ['AUA', 'AUC', 'AUU'],
-    'F': ['UUC', 'UUU'],
-    'H': ['CAC', 'CAU'],
-    'Y': ['UAC', 'UAU'],
-    'Q': ['CAA', 'CAG'],
-    'C': ['UGC', 'UGU'],
-    'N': ['AAC', 'AAU'],
-    'D': ['GAC', 'GAU'],
-    'K': ['AAA', 'AAG'],
-    'M': ['AUG'],
-    'W': ['UGG'],
+    "G": ["GGA", "GGU", "GGC", "GGG"],
+    "R": ["AGA", "AGG", "CGA", "CGC", "CGG", "CGU"],
+    "S": ["AGC", "AGU", "UCA", "UCC", "UCG", "UCU"],
+    "E": ["GAA", "GAG"],
+    "P": ["CCA", "CCC", "CCG", "CCU"],
+    "L": ["CUA", "CUC", "CUG", "CUU", "UUA", "UUG"],
+    "V": ["GUA", "GUC", "GUG", "GUU"],
+    "T": ["ACA", "ACC", "ACG", "ACU"],
+    "A": ["GCA", "GCC", "GCG", "GCU"],
+    "I": ["AUA", "AUC", "AUU"],
+    "F": ["UUC", "UUU"],
+    "H": ["CAC", "CAU"],
+    "Y": ["UAC", "UAU"],
+    "Q": ["CAA", "CAG"],
+    "C": ["UGC", "UGU"],
+    "N": ["AAC", "AAU"],
+    "D": ["GAC", "GAU"],
+    "K": ["AAA", "AAG"],
+    "M": ["AUG"],
+    "W": ["UGG"],
 }
 
 
-#Dictionary: keys - single-letter amino acid designations; values - names of amino acids
+# Dictionary: keys - single-letter amino acid designations; values - names of amino acids
 aa_one_to_three_letter = {
-    'A' : 'Ala-', 
-    'C' : 'Cys-', 
-    'D' : 'Asp-', 
-    'E' : 'Glu-',
-    'F' : 'Phe-', 
-    'G' : 'Gly-',
-    'H' : 'His-', 
-    'I' : 'Ile-',
-    'K' : 'Lys-', 
-    'L' : 'Leu-',
-    'M' : 'Met-', 
-    'N' : 'Asn-',
-    'P' : 'Pro-', 
-    'Q' : 'Gln-', 
-    'R' : 'Arg-', 
-    'S' : 'Ser-',
-    'T' : 'Thr-', 
-    'V' : 'Val-', 
-    'W' : 'Trp-', 
-    'Y' : 'Tyr-',
+    "A": "Ala-",
+    "C": "Cys-",
+    "D": "Asp-",
+    "E": "Glu-",
+    "F": "Phe-",
+    "G": "Gly-",
+    "H": "His-",
+    "I": "Ile-",
+    "K": "Lys-",
+    "L": "Leu-",
+    "M": "Met-",
+    "N": "Asn-",
+    "P": "Pro-",
+    "Q": "Gln-",
+    "R": "Arg-",
+    "S": "Ser-",
+    "T": "Thr-",
+    "V": "Val-",
+    "W": "Trp-",
+    "Y": "Tyr-",
 }
 
 
 # aminoacids mass dictionary
 aa_monoistopic_mass_dict = {
-    'A' : 71.03711, 
-    'C' : 103.00919, 
-    'D' : 115.02694, 
-    'E' : 129.04259,
-    'F' : 147.06841, 
-    'G' : 57.02146,
-    'H' : 137.05891, 
-    'I' : 113.08406,
-    'K' : 128.09496, 
-    'L' : 113.08406,
-    'M' : 131.04049, 
-    'N' : 114.04293,
-    'P' : 97.05276, 
-    'Q' : 128.05858, 
-    'R' : 156.10111, 
-    'S' : 87.03203,
-    'T' : 101.04768, 
-    'V' : 99.06841, 
-    'W' : 186.07931, 
-    'Y' : 163.06333,
+    "A": 71.03711,
+    "C": 103.00919,
+    "D": 115.02694,
+    "E": 129.04259,
+    "F": 147.06841,
+    "G": 57.02146,
+    "H": 137.05891,
+    "I": 113.08406,
+    "K": 128.09496,
+    "L": 113.08406,
+    "M": 131.04049,
+    "N": 114.04293,
+    "P": 97.05276,
+    "Q": 128.05858,
+    "R": 156.10111,
+    "S": 87.03203,
+    "T": 101.04768,
+    "V": 99.06841,
+    "W": 186.07931,
+    "Y": 163.06333,
+}
+
+# aminoacids pI (isoelectric point) values dictionary
+aa_pI = {
+    "A": 6.0,  # Alanine
+    "R": 10.8,  # Arginine
+    "N": 5.4,  # Asparagine
+    "D": 2.8,  # Aspartic Acid
+    "C": 5.0,  # Cysteine
+    "E": 3.2,  # Glutamic Acid
+    "Q": 5.7,  # Glutamine
+    "G": 6.1,  # Glycine
+    "H": 7.6,  # Histidine
+    "I": 6.0,  # Isoleucine
+    "L": 6.0,  # Leucine
+    "K": 9.7,  # Lysine
+    "M": 5.7,  # Methionine
+    "F": 5.5,  # Phenylalanine
+    "P": 6.3,  # Proline
+    "S": 5.7,  # Serine
+    "T": 5.6,  # Threonine
+    "W": 5.9,  # Tryptophan
+    "Y": 5.7,  # Tyrosine
+    "V": 6.0,  # Valine
 }

From 1a4a8e5cfa378182f6f544029c9079ecdb88b693 Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Sat, 30 Sep 2023 16:04:11 +0300
Subject: [PATCH 20/25] Add build_scoring_matrix in main func

---
 HW4_Trofimov/das_protein_tools.py | 38 +++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index cb90b5b..09da19c 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -3,8 +3,28 @@
 from random import choice
 
 
-AMINO_LETTERS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
-                 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
+AMINO_LETTERS = [
+    "A",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "K",
+    "L",
+    "M",
+    "N",
+    "P",
+    "Q",
+    "R",
+    "S",
+    "T",
+    "V",
+    "W",
+    "Y",
+]
 
 
 # Function to determine is the sequence is a protein or not
@@ -289,20 +309,24 @@ def translate_protein_rna(seq: str) -> str:
 def main(*args):
     action = args[-1]
     action_list = {
-        "calculate_pI": calculate_pI,
-        "build_scoring_matrix": build_scoring_matrix,
+        "get_pI": get_pI,
         "needleman_wunsch": needleman_wunsch,
+        "build_scoring_matrix": build_scoring_matrix,
         "calculate_aa_freq": calculate_aa_freq,
         "translate_protein_rna": translate_protein_rna,
         "convert_to_3L_code": convert_to_3L_code,
-        "protein_mass": protein_mass
+        "protein_mass": protein_mass,
     }
 
     if action not in action_list:
         raise ValueError(f"No such action: {action}")
 
-    if not (action == "needleman_wunsch" and len(args) == 3 or
-            action != "needleman_wunsch" and len(args) == 2):
+    if not (
+        action == "needleman_wunsch"
+        and len(args) == 3
+        or action != "needleman_wunsch"
+        and len(args) == 2
+    ):
         raise ValueError("Error in number of sequences")
 
     for sequence in args[:-1]:

From fcef1e43a1b939b913b8b65edfe68b2ac25d138e Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Sat, 30 Sep 2023 16:19:34 +0300
Subject: [PATCH 21/25] Fix bugs calculate_aa_freq

---
 HW4_Trofimov/das_protein_tools.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 09da19c..334e93d 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -219,7 +219,7 @@ def needleman_wunsch(
 
 
 # Function to calculate frequency of unique aminoacid in the sequence
-def calculate_aa_freq(seq: str) -> dict:
+def calculate_aa_freq(sequences: str) -> dict:
     """
     Calculates the frequency of each amino acid in a protein sequence or sequences.
 
@@ -228,7 +228,6 @@ def calculate_aa_freq(seq: str) -> dict:
     :return: dictionary with the frequency of each amino acid
     :rtype: dict
     """
-    sequences = ""
 
     # Creating a dictionary with aminoacid frequencies:
     amino_acid_frequency = {}

From f045f2305d69fb09629c41785feeec9f930f2bf9 Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Sat, 30 Sep 2023 16:26:35 +0300
Subject: [PATCH 22/25] Update README.md

---
 HW4_Trofimov/README.md | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/HW4_Trofimov/README.md b/HW4_Trofimov/README.md
index b201973..d80ff30 100644
--- a/HW4_Trofimov/README.md
+++ b/HW4_Trofimov/README.md
@@ -7,26 +7,46 @@ Das biotools strikes again! Now it works only with aminoacid sequences!
 
 ## Features
 
-- **calculate_pI()**: Calculate the isoelectric point of a given amino acid sequence, both individually for each amino acid and for the entire sequence.
+- **get_pI()**: Calculate the isoelectric point of a given amino acid sequence, both individually for each amino acid and for the entire sequence.
 
-- **build_scoring_matri())**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
+- **build_scoring_matri()**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
 
 - **needleman_wunsch()**: Implement the Needleman-Wunsch algorithm for global sequence alignment of two amino acid sequences.
 
 - **convert_to_3L_code()**: Converts one letter animoacid sequence to three letter aminoacid sequence.
 
+- **protein_mass()**: Calculates molecular weight of the aminoacid sequence using monoisotopic masses.
+
+- **translate_protein_rna()**: Converts aminoacid sequence to RNA sequence. For those aminoacids that are coded with more than one codon, this function randomly chooses one codon from the set.
+
+## Examples
+
+- **get_pI**
+  
+```python 
+calculate_pI('RAHP') -> "Sequence: RAHP. Isoelectric point of each aminoacid: [('R', 10.8), ('A', 6.0), ('H', 7.6), ('P', 6.3)]"
+```
+
+- **needleman_wunsch**
+
+```python 
+needleman_wunsch('raHP','RAQQHP') -> 'ra--HP, RAQQHP, final score: 2'
+```
+
+- **convert_to_3L_code**
+
 ```python 
 convert_to_3L_code('ACDEF') -> 'Ala-Cys-Asp-Glu-Phe'
 ```
 
-- **protein_mass()**: Calculates molecular weight of the aminoacid sequence using monoisotopic masses.
+- **protein_mass**
 
 ```python 
 protein_mass('ACDEF') -> 565.184
 ```
 
-- **translate_protein_rna()**: Converts aminoacid sequence to RNA sequence. For those aminoacids that are coded with more than one codon, this function randomly chooses one codon from the set.
+- **translate_protein_rna**
 
 ```python 
 translate_protein_rna('ACDEF') -> 'GCCUGCGACGAGUUC'
-```
\ No newline at end of file
+```

From 1b18fb7f2ed01a33b16a9ecc455b1a64b453aac4 Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Sat, 30 Sep 2023 16:30:49 +0300
Subject: [PATCH 23/25] Update README.md

---
 HW4_Trofimov/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Trofimov/README.md b/HW4_Trofimov/README.md
index d80ff30..b98e466 100644
--- a/HW4_Trofimov/README.md
+++ b/HW4_Trofimov/README.md
@@ -7,7 +7,7 @@ Das biotools strikes again! Now it works only with aminoacid sequences!
 
 ## Features
 
-- **get_pI()**: Calculate the isoelectric point of a given amino acid sequence, both individually for each amino acid and for the entire sequence.
+- **get_pI()**: Gives isoelectric point value for each aminoacid individually.
 
 - **build_scoring_matri()**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
 

From 8edf4b5ada925dd1ae53e56e937600e14ad8d2ae Mon Sep 17 00:00:00 2001
From: michtrofimov <92677906+michtrofimov@users.noreply.github.com>
Date: Sat, 30 Sep 2023 16:42:08 +0300
Subject: [PATCH 24/25] Update README.md

---
 HW4_Trofimov/README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/HW4_Trofimov/README.md b/HW4_Trofimov/README.md
index b98e466..c2660a9 100644
--- a/HW4_Trofimov/README.md
+++ b/HW4_Trofimov/README.md
@@ -9,7 +9,7 @@ Das biotools strikes again! Now it works only with aminoacid sequences!
 
 - **get_pI()**: Gives isoelectric point value for each aminoacid individually.
 
-- **build_scoring_matri()**: Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
+- **build_scoring_matri()**: Auxiliary function for needleman_wunsch. Build a scoring matrix for amino acid pairs, which can be used in sequence alignment algorithms.
 
 - **needleman_wunsch()**: Implement the Needleman-Wunsch algorithm for global sequence alignment of two amino acid sequences.
 
@@ -50,3 +50,11 @@ protein_mass('ACDEF') -> 565.184
 ```python 
 translate_protein_rna('ACDEF') -> 'GCCUGCGACGAGUUC'
 ```
+
+## OUR TEAM
+<img width="800" alt="Screenshot 2023-09-30 at 16 34 25" src="https://github.com/michtrofimov/HW4_Functions2/assets/92677906/38fcc288-2d27-445d-b1dc-a3b055099a26">
+
+Up to bottom, left to right:
+- Alisa Fedorenko: functions **main**, **calculate_aa_freq**
+- Michil Trofimov: functions **get_pI**, **needleman_wunsch** (teamlead)
+- Shakir Suleimanov: functions **convert_to_3L_code**, **protein_mass**, **translate_protein_rna**

From e52a38e0fbf00042f57c700a3323b3817a10e63d Mon Sep 17 00:00:00 2001
From: Michil Trofimov <trofimov.michil@gmail.com>
Date: Sat, 30 Sep 2023 16:45:28 +0300
Subject: [PATCH 25/25] Add docstring to main function

---
 HW4_Trofimov/das_protein_tools.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/HW4_Trofimov/das_protein_tools.py b/HW4_Trofimov/das_protein_tools.py
index 334e93d..3ae64ed 100644
--- a/HW4_Trofimov/das_protein_tools.py
+++ b/HW4_Trofimov/das_protein_tools.py
@@ -305,7 +305,31 @@ def translate_protein_rna(seq: str) -> str:
         raise ValueError("Sequence is not a protein, input should be a protein")
 
 
-def main(*args):
+def main(*args: str):
+    """
+    Main function to perform various actions on protein sequences.
+
+    Args:
+    - *args: Variable number of arguments. The first n-1 arguments should be protein sequences,
+             and the last argument should be a string specifying the action to be performed.
+
+    Returns:
+    - The result of the specified action on the input protein sequences.
+
+    Raises:
+    - ValueError: If the specified action is not supported or if there is an error in the number of sequences.
+                  Also raised if the input sequences are not valid protein sequences.
+
+    Supported Actions:
+    - "get_pI": Calculate isoelectric points for each amino acid in the sequence.
+    - "needleman_wunsch": Perform global alignment of two sequences using the Needleman-Wunsch algorithm.
+    - "build_scoring_matrix": Build a scoring matrix for amino acid pairs.
+    - "calculate_aa_freq": Calculate the frequency of each amino acid in a protein sequence.
+    - "translate_protein_rna": Translate amino acid sequence to RNA, using random codons for each amino acid.
+    - "convert_to_3L_code": Convert one-letter amino acid sequence to three-letter coding.
+    - "protein_mass": Calculate the molecular weight of the protein sequence.
+    """
+
     action = args[-1]
     action_list = {
         "get_pI": get_pI,