diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1c2d52b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/* diff --git a/HW4_Sapozhnikov/README.md b/HW4_Sapozhnikov/README.md new file mode 100644 index 0000000..c347e60 --- /dev/null +++ b/HW4_Sapozhnikov/README.md @@ -0,0 +1,109 @@ +# HW 4. Functions 2 +> *This is the repo for the fourth homework of the BI Python 2023 course* + +### Prototool +`prototool.py` is a special script for working with polyaminoacid sequences + +*** + +### Overview +`prototool.py` includes 7 methods to treatment of polyaminoacid sequences. +`prototool.py` can be used for the following purposes: +- recoding 1-letter coded polyaminoacid seqeunces into 3-letter coded and vice versa; +- polyaminoacid sequences aligment with Smith-Waterman algorithm [^1]; +- finding possinle RNA sequences for given polyaminoacid sequences; +- determining polyaminoacid isoelectric point; +- calculating polyaminoacid molecular weight; +- finding possinle DNA sequences for given polyaminoacid sequences; +- determining GC-content of a corresponding DNA sequence to a given polyaminoacid sequence + +*** + +### Usage +This tool can be used both standalone and as module. +- to use `prototool` standalone you will have to add these lines in the code + ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/5fa3cf7f-e6f3-4294-9e81-b1ebe17c8514) + - where *args are sequences you want to process and method is a specified algorithm to use + - your result will be written in a variable (test on a picture) +- to use `prototool` as module (recomended) you should import it as any other module (check the path: prototools.py should be in the same directory as your script). Then you can freely use any of its functions (see examples). + +*** + +### Options +Arguments: +- `*args[str]` sequences to work with. You can pass several arguments into all functions +- `method` - a method to use + +output: All functions return a dict, where keys are original sequenses, values are results after using a corresponding method. + +*** + +### Examples + +def recode allows to translate 1-letter to 3-letters polyaminoacids code +- `main('AlaValTyr', 'DNT', method = 'recode')` +- `recode('AlaValTyr', 'DNT')` +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/117befa5-feaa-433a-9ac9-23cffe9b024f) +*** + +def local_alignmen perform a local alignment of 2 given sequences. Needs at least two sequences to be passed +- `main('MetAsnTrp', 'MNT', method='local_alignment')` +- `local_alignmen('MetAsnTrp', 'MNT')` +- Note that local_alignment function has a flag prettify (default = True) that prints out aligned sequences on each another +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/4dd36d24-a177-4419-9053-a5e2923a980c) +*** + +def from_proteins_seqs_to_rna allows to decode polyaminoacid sequences in RNA sequences +- `main('AlaValTyr', 'DNT', method = 'from_proteins_seqs_to_rna')` +- `from_proteins_seqs_to_rna('AlaValTyr', 'DNT')` +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/9ee92d0d-68a4-471b-b65a-2fa6b46ab844) +*** + +def isoelectric_point_determination allows to determine isoelectric point of polyaminoacid sequences +- `main('AlaValTyr', 'DNT', method = 'isoelectric_point_determination')` +- `isoelectric_point_determination('AlaValTyr', 'DNT')` +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/24027a07-b20b-42d4-bb10-4ca7189038d4) +*** + +def back_transcribe allows to decode polyaminoacid sequences in DNA sequences +- `main('AlaValTyr', 'DNT', method = 'back_transcribe')` +- `back_transcribe('AlaValTyr', 'DNT')` +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/71f07616-a37d-48da-9e63-82b81836b9d7) +*** + +def count_gc_content allows to count the ratio of GC in the entire DNA sequence +- `main('AlaValTyr', 'DNT', method = 'count_gc_content')` +- `count_gc_content('AlaValTyr', 'DNT')` +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/d2705714-a3e8-4054-8998-61d922a4feb6) +*** + +def count_protein_molecular_weight allows to calculate the molecular weight of the polyaminoacid +- `main('AlaValTyr', 'DNT', method = 'count_protein_molecular_weight')` +- `count_protein_molecular_weight('AlaValTyr', 'DNT')` +- ![image](https://github.com/NSapozhnikov/HW4_Sapozhnikov/assets/81642791/cc1eff9a-1b39-4232-98e4-80f622101083) + +*** + +### Troubleshooting +If you have `ValueError("No input defined.")` it means, that you have an empty input. Please, enter the correct input. +*** +If you have `ValueError(method, " is not a valid method.")` it means, that your tool is not correct. Please, enter the right tool. +*** +If you have `ValueError('Non-protein aminoacids in sequence')` it means, that your sequences contain non-protein aminoacids. Please, check your sequences and enter the correct input. + +*** + +### Contributions and contacts + +Feel free to report any bugs and problems encountered. +Email: nikita.sapozhnikov1@gmail.com developed recode(), prettify_alignment(), local_alignmen(), check_input() +*** +nekrasovadasha22@mail.ru developed from_proteins_seqs_to_rna(), isoelectric_point_determination() +*** +alina.potyseva@gmail.com developed back_transcribe(), count_gc_content(), count_protein_molecular_weight() + +*** + +### References + +[^1]: T.F. Smith, M.S. Waterman, (1981). [Identification of common molecular subsequences](https://doi.org/10.1016/0022-2836(81)90087-5). Journal of Molecular Biology. diff --git a/HW4_Sapozhnikov/prototool.py b/HW4_Sapozhnikov/prototool.py new file mode 100644 index 0000000..dd2d851 --- /dev/null +++ b/HW4_Sapozhnikov/prototool.py @@ -0,0 +1,564 @@ +""" +This is a prototool. WE ARE SORRY!!! +""" + +from typing import List, Optional, Tuple, Union + + +AMINOACIDS_DICT = { + 'Ala': {'TO_1': 'A', + 'PROTEIN_TO_RNA_COMBINATION': {'GCU', 'GCC', 'GCA', 'GCG'}, + 'PKA_AMINOACIDS': [2.34, 9.69], + 'MOLECULAR_WEIGHTS': 89}, + 'Arg': {'TO_1': 'R', + 'PROTEIN_TO_RNA_COMBINATION': {'CGU', 'CGC', 'CGA', 'CGG', 'AGA', + 'AGG'}, + 'PKA_AMINOACIDS': [2.17, 9.04, 12.68], + 'MOLECULAR_WEIGHTS': 174}, + 'Asn': {'TO_1': 'N', + 'PROTEIN_TO_RNA_COMBINATION': {'AAU', 'AAC'}, + 'PKA_AMINOACIDS': [1.88, 9.60, 3.65], + 'MOLECULAR_WEIGHTS': 132}, + 'Asp': {'TO_1': 'D', + 'PROTEIN_TO_RNA_COMBINATION': {'GAU', 'GAC'}, + 'PKA_AMINOACIDS': [1.88, 9.60, 3.65], + 'MOLECULAR_WEIGHTS': 133}, + 'Cys': {'TO_1': 'C', + 'PROTEIN_TO_RNA_COMBINATION': {'UGU', 'UGC'}, + 'PKA_AMINOACIDS': [1.96, 10.28, 8.18], + 'MOLECULAR_WEIGHTS': 121}, + 'Glu': {'TO_1': 'Q', + 'PROTEIN_TO_RNA_COMBINATION': {'GAA', 'GAG'}, + 'PKA_AMINOACIDS': [2.19, 9.67, 4.25], + 'MOLECULAR_WEIGHTS': 147}, + 'Gln': {'TO_1': 'E', + 'PROTEIN_TO_RNA_COMBINATION': {'CAA', 'CAG'}, + 'PKA_AMINOACIDS': [2.17, 9.13], + 'MOLECULAR_WEIGHTS': 146}, + 'Gly': {'TO_1': 'G', + 'PROTEIN_TO_RNA_COMBINATION': {'GGU', 'GGC', 'GGA', 'GGG'}, + 'PKA_AMINOACIDS': [2.34, 9.60], + 'MOLECULAR_WEIGHTS': 75}, + 'His': {'TO_1': 'E', + 'PROTEIN_TO_RNA_COMBINATION': {'CAU', 'CAC'}, + 'PKA_AMINOACIDS': [1.82, 9.17], + 'MOLECULAR_WEIGHTS': 155}, + 'Ile': {'TO_1': 'I', + 'PROTEIN_TO_RNA_COMBINATION': {'AUU', 'AUC', 'AUA'}, + 'PKA_AMINOACIDS': [2.36, 9.68], + 'MOLECULAR_WEIGHTS': 131}, + 'Leu': {'TO_1': 'L', + 'PROTEIN_TO_RNA_COMBINATION': {'CUU', 'CUC', 'CUA', 'CUG'}, + 'PKA_AMINOACIDS': [2.36, 9.60], + 'MOLECULAR_WEIGHTS': 131}, + 'Lys': {'TO_1': 'K', + 'PROTEIN_TO_RNA_COMBINATION': {'AAA', 'AAG'}, + 'PKA_AMINOACIDS': [2.18, 8.95, 10.53], + 'MOLECULAR_WEIGHTS': 146}, + 'Met': {'TO_1': 'M', + 'PROTEIN_TO_RNA_COMBINATION': {'AUG'}, + 'PKA_AMINOACIDS': [2.28, 9.21], + 'MOLECULAR_WEIGHTS': 149}, + 'Phe': {'TO_1': 'F', + 'PROTEIN_TO_RNA_COMBINATION': {'UUU', 'UUC'}, + 'PKA_AMINOACIDS': [2.20, 9.13], + 'MOLECULAR_WEIGHTS': 165}, + 'Pro': {'TO_1': 'P', + 'PROTEIN_TO_RNA_COMBINATION': {'CCU', 'CCC', 'CCA', 'CCG'}, + 'PKA_AMINOACIDS': [1.99, 10.96], + 'MOLECULAR_WEIGHTS': 115}, + 'Ser': {'TO_1': 'S', + 'PROTEIN_TO_RNA_COMBINATION': {'UCU', 'UCC', 'UCA', 'UCG'}, + 'PKA_AMINOACIDS': [2.21, 9.15], + 'MOLECULAR_WEIGHTS': 105}, + 'Thr': {'TO_1': 'T', + 'PROTEIN_TO_RNA_COMBINATION': {'ACU', 'ACC', 'ACA', 'ACG'}, + 'PKA_AMINOACIDS': [2.11, 9.62], + 'MOLECULAR_WEIGHTS': 119}, + 'Tyr': {'TO_1': 'W', + 'PROTEIN_TO_RNA_COMBINATION': {'UAU', 'UAC'}, + 'PKA_AMINOACIDS': [2.20, 9.11, 10.07], + 'MOLECULAR_WEIGHTS': 181}, + 'Trp': {'TO_1': 'Y', + 'PROTEIN_TO_RNA_COMBINATION': {'UGG'}, + 'PKA_AMINOACIDS': [2.38, 9.39], + 'MOLECULAR_WEIGHTS': 204}, + 'Val': {'TO_1': 'V', + 'PROTEIN_TO_RNA_COMBINATION': {'GUU', 'GUC', 'GUA', 'GUG'}, + 'PKA_AMINOACIDS': [2.32, 9.62], + 'MOLECULAR_WEIGHTS': 117}, +} + +# A dictionary where keys are 1-letter and values are 3-letters codes +TO_3_DICT = {nested_dict['TO_1']: key for key, + nested_dict in AMINOACIDS_DICT.items()} + +TRANSCRIBE_DICT: dict = {'A': 'A', + 'U': 'T', + 'G': 'G', + 'C': 'C', + 'a': 'a', + 'u': 't', + 'g': 'g', + 'c': 'c'} + + +def is_one_letter(seq: str) -> bool: + """ + Defines whether the sequence is 1 coded. + + Args: + - seq - sequence to check + + Returns: + - bool + """ + return all(aa.isalpha() and aa.isupper() for aa in seq) + + +def recode(seq: str) -> dict: + """ + Translate 1-letter to 3-letter encoding if 1-letter + encoded sequence is given and vice versa. + + Args: + - seq - sequence or list of sequences to recode + + Returns: + - function_result - a dictionary containing recoded sequences as values + for original sequences keys + """ + + if is_one_letter(seq): + # Translate 1-letter to 3-letter coded sequence + three_letter_sequence = "" + for aa in seq: + three_letter_code = TO_3_DICT.get(aa, aa) + three_letter_sequence += three_letter_code + return three_letter_sequence + # Translate 3-letter to 1-letter coded sequence + one_letter_sequence = "" + for aa in range(0, len(seq), 3): + amino_acid = seq[aa:aa+3] + one_letter_sequence += AMINOACIDS_DICT[amino_acid]['TO_1'] + return one_letter_sequence + + +def prettify_alignment(aligned_seq_on: str, aligned_seq2: str) -> None: + """ + Prettifies alignment output by printing out two + sequences on top of each other + + Finds the start of aligned sequence in the longer of sequences.\\ + Prints the longer sequence as an upper one and aligned sequence + is bellow separated via vertical lines + + Args: + - aligned_seq_on, aligned_seq2 - sequences + from the local_alignment() + + Returns: + None \\ + Prints out the prettified view in stdout + """ + + print(aligned_seq_on) + print('|' * len(aligned_seq2)) + print(aligned_seq2) + + +def local_alignment(seq_on: str, + seq2: str, + alignment_dict: dict, + seq_id: int, + match=2, + mismatch=-1, + gap=-1, + prettify: bool = True) -> dict: + """ + Perform a local alignment of 2 given sequences + + Args: + - seq_on - the sequence to align onto + - seq2 - sequences to align + - alignment_dict - a dictionary to yield alignment results + - match, mismatch, gap - alignment scoring and penalty values + defaulted to 2, -1, -1 + - prettify - if True (default) prints out the prettified version + of sequences aligned on top of each other + - seq_id - itterator for a seq list + + Returns: + - a a dictionary with alignment resluts + """ + + len_seq_on, len_seq2 = len(seq_on), len(seq2) + + # Initialize the score matrix and traceback matrix + score_matrix = [[0] * (len_seq2 + 1) for _ in range(len_seq_on + 1)] + traceback_matrix = [[None] * (len_seq2 + 1) for _ in range(len_seq_on + 1)] + + alignment_score = 0 # To keep track of the maximum score in the matrix + max_i, max_j = 0, 0 # To store the position of the maximum score + + # Fill in the score matrix + for i in range(1, len_seq_on + 1): + for j in range(1, len_seq2 + 1): + if seq_on[i - 1] == seq2[j - 1]: + match_score = score_matrix[i - 1][j - 1] + match + else: + match_score = score_matrix[i - 1][j - 1] + mismatch + + delete_score = score_matrix[i - 1][j] + gap + insert_score = score_matrix[i][j - 1] + gap + + # Calculate the maximum score for the current cell + score = max(0, match_score, delete_score, insert_score) + + # Update the score matrix and traceback matrix + score_matrix[i][j] = score + + if score > alignment_score: + alignment_score = score + max_i, max_j = i, j + + if score == match_score: + traceback_matrix[i][j] = "match" + elif score == delete_score: + traceback_matrix[i][j] = "delete" + elif score == insert_score: + traceback_matrix[i][j] = "insert" + else: + traceback_matrix[i][j] = "none" + + # Traceback to find the aligned sequences + aligned_seq_on = [] + aligned_seq2 = [] + + counter_identity: int = 0 + counter_gaps: int = 0 + + i, j = max_i, max_j + + while i > 0 and j > 0: + if traceback_matrix[i][j] == "match": + aligned_seq_on.append(seq_on[i - 1]) + aligned_seq2.append(seq2[j - 1]) + counter_identity += 1 + i -= 1 + j -= 1 + elif traceback_matrix[i][j] == "delete": + aligned_seq_on.append(seq_on[i - 1]) + aligned_seq2.append("-") + counter_gaps += 1 + i -= 1 + elif traceback_matrix[i][j] == "insert": + aligned_seq_on.append("-") + aligned_seq2.append(seq2[j - 1]) + counter_gaps += 1 + j -= 1 + else: + break + + # Reverse the aligned sequences + aligned_seq_on = "".join(aligned_seq_on[::-1]) + aligned_seq2 = "".join(aligned_seq2[::-1]) + + alignment_length = (len(aligned_seq_on) + if len(aligned_seq_on) < len(aligned_seq2) + else len(aligned_seq2)) + + # Form an output dictionary + alignment_dict['aligned_seq_on'] = aligned_seq_on + + identity = round(counter_identity/alignment_length, 4) + + alignment_dict[f'aligned_seq{seq_id+1}'] = {'seq': aligned_seq2, + 'length': alignment_length, + 'score': alignment_score, + 'identity': identity, + 'gaps': counter_gaps} + + # Prettify an alignment output + if prettify is True: + prettify_alignment(aligned_seq_on, aligned_seq2) + else: + pass + + return alignment_dict + + +def count_protein_molecular_weight(*seqs_list: Union[List[str], str]) -> dict: + """ + :param seqs_list: seqs_list is a list of strings without whitespace + (e.g. 'AlaSer'). You can put as many sequences as you wish. + :return: This function returns molecular weight of the protein. + """ + result = {} + for seq in seqs_list: + protein_weight = 0 + aminoacids = [seq[i:i + 3] for i in range(0, len(seq), 3)] + for i, aminoacid in enumerate(aminoacids): + if aminoacid in AMINOACIDS_DICT.keys(): + aminoacid_weight = (AMINOACIDS_DICT[aminoacid] + ['MOLECULAR_WEIGHTS']) + protein_weight += aminoacid_weight + result[seq] = protein_weight + return result + + +def from_proteins_seqs_to_rna(*seqs_list: Union[List[str], str]) -> dict: + """ + :param seqs_list: a list of strings with type 'ValTyrAla','AsnAspCys'. + You can pass more than one sequence at the time. + :return: dictionary, where [key] is your input protein sequences + and values are combinations of RNA codones, which encode this protein + """ + + answer_dictionary = {} + for seq in seqs_list: + + rna_combination = '' + divided_acids = [seq[i:i + 3] for i in range(0, + len(seq), + 3)] + for divided_acid in divided_acids: + + if divided_acid in AMINOACIDS_DICT.keys(): + rna_combination += next(iter(AMINOACIDS_DICT[divided_acid] + ['PROTEIN_TO_RNA_COMBINATION'])) + else: + raise ValueError('Non-protein aminoacids in sequence') + answer_dictionary[seq] = rna_combination + return answer_dictionary + + +def isoelectric_point_determination(*seqs_list: Union[List[str], str]) -> dict: + """ + :param seqs_list: a list of strings with type 'ValTyrAla','AsnAspCys'. + You can pass more than one sequence at a time. + :return: dictionary, where [key] is your input protein sequence and value + is an isoelectric point of your input proteins + """ + answer_dictionary = {} + + for aminoacids in seqs_list: + divided_acids = [aminoacids[i:i + 3] for i in range(0, + len(aminoacids), + 3)] + for divided_acid in divided_acids: + if divided_acid not in AMINOACIDS_DICT.keys(): + raise ValueError('Non-protein aminoacids in sequence') + + isoelectric_point_mean = 0 + count_groups = 0 + for acid_index, aminoacid in enumerate(divided_acids): + if acid_index == 0: + isoelectric_point_mean\ + += (AMINOACIDS_DICT[aminoacid]['PKA_AMINOACIDS'][0]) + count_groups += 1 + elif acid_index == len(divided_acids) - 1: + isoelectric_point_mean = (isoelectric_point_mean + + (AMINOACIDS_DICT[aminoacid] + ['PKA_AMINOACIDS'][-1])) + count_groups += 1 + else: + if len(AMINOACIDS_DICT[aminoacid]['PKA_AMINOACIDS']) > 2: + isoelectric_point_mean = (isoelectric_point_mean + + (AMINOACIDS_DICT[aminoacid] + ['PKA_AMINOACIDS'][1])) + count_groups += 1 + answer_dictionary[aminoacids] = isoelectric_point_mean / count_groups + return answer_dictionary + + +def back_transcribe(*seqs_list: Union[List[str], str]) -> dict: + """ + :param seqs_list: is a list of strings without whitespace. + You can put as many sequences as you wish. + :return: This function returns a dictonary where key is inputed protein + sequence and values are DNA codons + """ + result = {} + for seq in seqs_list: + rna = list((from_proteins_seqs_to_rna(seq)).get(seq)) + for i in range(len(rna)): + if rna[i] in TRANSCRIBE_DICT.keys(): + rna[i] = TRANSCRIBE_DICT[rna[i]] + result[seq] = "".join(rna) + return result + + +def count_gc_content(*seqs_list: Union[List[str], str]) -> dict: + """ + :param seqs_list: is a list of strings without whitespace. + You can put as many sequences as you wish. + :return: This function returns GC-content of DNA sequence, which encodes + the protein + """ + result = {} + for seq in seqs_list: + dna = list((back_transcribe(seq)).get(seq)) + gc_content = round(100 * (dna.count('G') + dna.count('C'))/len(dna)) + result[seq] = gc_content + return result + + +def check_input(*args: Union[List[str], str], method: str) -> \ + Tuple[List[str], Optional[str]]: + """ + Function to check the validity of the input. + + Args: + - *args - are supposed to be all sequences to process + - method - the method to process with method + + Returns: + - seqs_list - list of sequences + - seq_on (optional) - in case of local_alignment method + """ + + if len(args) == 0: + # Handle the case where there are no arguments + raise ValueError('No input defined.') + else: + if method not in ['recode', + 'local_alignment', + 'from_proteins_seqs_to_rna', + 'isoelectric_point_determination', + 'count_protein_molecular_weight', + 'back_transcribe', + 'count_gc_content']: + raise ValueError(method, ' is not a valid method.') + else: + # Form a list with sequences from the input + seqs_list = list(args) + if method == 'local_alignment': + if len(seqs_list) < 2: + raise IndexError('Need at least two sequences to align.') + for i, seq in enumerate(seqs_list): + if not is_one_letter(seq): + print('Warning! Function local_alignment() needs ' + '1-letter encoded sequences. Your sequence ' + 'will be mutated to a 1-letter encoding.') + seqs_list[i] = recode(seq) + print(seq, ' sequence has been mutated into: ', + seqs_list[i]) + seq_on = seqs_list.pop(0) + return seqs_list, seq_on + for i, seq in enumerate(seqs_list): + if is_one_letter(seq): + print(f'Warning! Function {method}() needs ' + '3-letter encoded sequences. Your sequence ' + 'will be mutated to a 3-letter encoding.') + seqs_list[i] = recode(seq) + print(seq, ' sequence has been mutated into: ', + seqs_list[i]) + seq_on = None + return seqs_list, seq_on + + +def main(*args: Tuple[Union[List[str], str]], + method: Optional[str] = None) -> dict: + """ + This function provides the access to the following methods: + + 1. Translate 1 letter to 3 letter encoding and vice versa - the last + argument: 'recode' + - needs at least 1 sequence 1- or 3- letter encoded. Can recive + more than 1 sequences + - returns a dictionary containing translations between 1- and 3- + letter codes + + 2. Local Alignment of two sequences - the last argument: 'local_alignment' + - needs at least 2 protein sequences 1-letter encoded. + When more than 2 sequences are passed, uses the first + entered sequence to align the rest on + - performs an alignment using Smith-Waterman algorithm + + 3. Find possible RNA sequences for defined protein sequence - the + last argument: 'from_proteins_seqs_to_rna' + - needs at least 1 protein sequence 3-letter encoded + - returns a dictionary, where key is your input protein sequences + and values are combinations of RNA codones, which encode this protein + + 4. Determinate isoelectric point - the last argument: + 'isoelectric_point_determination' + - needs an input containing at least 1 aminoacid. Can recive multiple + different protein sequences + - returns a dictionary, where key is your input protein sequence and + value is an isoelectric point of this protein + + 5. Calculate protein molecular weight - the last argument: + 'count_protein_molecular_weight' + - Seqs is an argument of the function. It is a string without + whitespace (e.g. 'AlaSer'). You can put as many arguments as you wish. + - returns a dictionary with protein sequences as keys and their + calculated molecular weight as corresponding values + + 6. Determine possible DNA sequence from protein sequence - the last + argument: 'back_transcribe' + - needs a string without whitespaces. You can put as many arguments as + you wish. + - returns a dictonary where keys are inputed protein sequences and + corresponding values are possible DNA codons + + 7. Calculate a GC ratio in a possible DNA sequence of a given aminoacid + sequence - the last argument 'count_gc_content' + - needs a string without whitespaces. You can put as many sequences + as you wish. + - returns a dictionary where keys are inputed aminoacid sequences and + GC-content of DNA sequence, which encodes the protein are + corresponding values + + Args: + - *args - are supposed to be all sequences to process + - method is a kwarg - the method to process with. + + Returns: + function_result - a dictionary with the result of a chosen function + """ + + seqs_list, seq_on = check_input(*args, method=method) + print(f'Your sequences are: {seqs_list}', + f'The method is: {method}', sep='\n') + + match method: + + case 'recode': + + recode_dict: dict = {} + for seq in seqs_list: + recode_dict[seq] = recode(seq=seq) + return recode_dict + + case 'local_alignment': + + alignment_dict: dict = {} + for seq_id, seq in enumerate(seqs_list): + local_alignment(seq_on=seq_on, + seq2=seq, + alignment_dict=alignment_dict, + seq_id=seq_id, + prettify=True) + return alignment_dict + + case 'from_proteins_seqs_to_rna': + + return from_proteins_seqs_to_rna(*seqs_list) + + case 'count_protein_molecular_weight': + + return count_protein_molecular_weight(*seqs_list) + + case 'isoelectric_point_determination': + + return isoelectric_point_determination(*seqs_list) + + case 'back_transcribe': + + return back_transcribe(*seqs_list) + + case 'count_gc_content': + + return count_gc_content(*seqs_list)