diff --git a/biobb_vs/docs/source/command_line.md b/biobb_vs/docs/source/command_line.md index 11aa9a0..072da78 100644 --- a/biobb_vs/docs/source/command_line.md +++ b/biobb_vs/docs/source/command_line.md @@ -140,7 +140,7 @@ Config parameters for this building block: * **ligand** (*string*): (None) Ligand to be found in the protein structure. If no ligand provided, the largest one will be selected, if more than one.. * **radius** (*number*): (5.0) Cut-off distance (Ã…ngstroms) around ligand atoms to consider a protein atom as a binding site atom.. * **max_num_ligands** (*integer*): (15) Total number of superimposed ligands to be extracted from the identity cluster. For populated clusters, the restriction avoids to superimpose redundant structures. If 0, all ligands extracted will be considered.. -* **matrix_name** (*string*): (blosum62) Substitution matrices for use in alignments. . +* **matrix_name** (*string*): (BLOSUM62) Substitution matrices for use in alignments. . * **gap_open** (*number*): (-10.0) Gap open penalty.. * **gap_extend** (*number*): (-0.5) Gap extend penalty.. * **remove_tmp** (*boolean*): (True) Remove temporal files.. @@ -152,7 +152,7 @@ properties: gap_extend: -0.5 gap_open: -10.0 ligand: PGA - matrix_name: blosum62 + matrix_name: BLOSUM62 max_num_ligands: 15 radius: 5 @@ -167,7 +167,7 @@ bindingsite --config config_bindingsite.yml --input_pdb_path bindingsite.pdb --i { "properties": { "ligand": "PGA", - "matrix_name": "blosum62", + "matrix_name": "BLOSUM62", "gap_open": -10.0, "gap_extend": -0.5, "max_num_ligands": 15, diff --git a/biobb_vs/json_schemas/bindingsite.json b/biobb_vs/json_schemas/bindingsite.json index 328ca39..fac0f5b 100644 --- a/biobb_vs/json_schemas/bindingsite.json +++ b/biobb_vs/json_schemas/bindingsite.json @@ -99,210 +99,160 @@ }, "matrix_name": { "type": "string", - "default": "blosum62", + "default": "BLOSUM62", "wf_prop": false, "description": "Substitution matrices for use in alignments. ", "enum": [ - "benner6", - "benner22", - "benner74", - "blosum100", - "blosum30", - "blosum35", - "blosum40", - "blosum45", - "blosum50", - "blosum55", - "blosum60", - "blosum62", - "blosum65", - "blosum70", - "blosum75", - "blosum80", - "blosum85", - "blosum90", - "blosum95", - "feng", - "fitch", - "genetic", - "gonnet", - "grant", - "ident", - "johnson", - "levin", - "mclach", - "miyata", - "nwsgappep", - "pam120", - "pam180", - "pam250", - "pam30", - "pam300", - "pam60", - "pam90", - "rao", - "risler", - "structure" + "BENNER22", + "BENNER6", + "BENNER74", + "BLASTN", + "BLASTP", + "BLOSUM45", + "BLOSUM50", + "BLOSUM62", + "BLOSUM80", + "BLOSUM90", + "DAYHOFF", + "FENG", + "GENETIC", + "GONNET1992", + "HOXD70", + "JOHNSON", + "JONES", + "LEVIN", + "MCLACHLAN", + "MDM78", + "MEGABLAST", + "NUC.4.4", + "PAM250", + "PAM30", + "PAM70", + "RAO", + "RISLER", + "SCHNEIDER", + "STR", + "TRANS" ], "property_formats": [ { - "name": "benner6", + "name": "BENNER22", "description": null }, { - "name": "benner22", + "name": "BENNER6", "description": null }, { - "name": "benner74", + "name": "BENNER74", "description": null }, { - "name": "blosum100", + "name": "BLASTN", "description": null }, { - "name": "blosum30", + "name": "BLASTP", "description": null }, { - "name": "blosum35", + "name": "BLOSUM45", "description": null }, { - "name": "blosum40", + "name": "BLOSUM50", "description": null }, { - "name": "blosum45", + "name": "BLOSUM62", "description": null }, { - "name": "blosum50", + "name": "BLOSUM80", "description": null }, { - "name": "blosum55", + "name": "BLOSUM90", "description": null }, { - "name": "blosum60", + "name": "DAYHOFF", "description": null }, { - "name": "blosum62", + "name": "FENG", "description": null }, { - "name": "blosum65", + "name": "GENETIC", "description": null }, { - "name": "blosum70", + "name": "GONNET1992", "description": null }, { - "name": "blosum75", + "name": "HOXD70", "description": null }, { - "name": "blosum80", + "name": "JOHNSON", "description": null }, { - "name": "blosum85", + "name": "JONES", "description": null }, { - "name": "blosum90", + "name": "LEVIN", "description": null }, { - "name": "blosum95", + "name": "MCLACHLAN", "description": null }, { - "name": "feng", + "name": "MDM78", "description": null }, { - "name": "fitch", + "name": "MEGABLAST", "description": null }, { - "name": "genetic", + "name": "NUC.4.4", "description": null }, { - "name": "gonnet", + "name": "PAM250", "description": null }, { - "name": "grant", + "name": "PAM30", "description": null }, { - "name": "ident", + "name": "PAM70", "description": null }, { - "name": "johnson", + "name": "RAO", "description": null }, { - "name": "levin", + "name": "RISLER", "description": null }, { - "name": "mclach", + "name": "SCHNEIDER", "description": null }, { - "name": "miyata", + "name": "STR", "description": null }, { - "name": "nwsgappep", - "description": null - }, - { - "name": "pam120", - "description": null - }, - { - "name": "pam180", - "description": null - }, - { - "name": "pam250", - "description": null - }, - { - "name": "pam30", - "description": null - }, - { - "name": "pam300", - "description": null - }, - { - "name": "pam60", - "description": null - }, - { - "name": "pam90", - "description": null - }, - { - "name": "rao", - "description": null - }, - { - "name": "risler", - "description": null - }, - { - "name": "structure", + "name": "TRANS", "description": null } ] diff --git a/biobb_vs/test/data/config/config_bindingsite.json b/biobb_vs/test/data/config/config_bindingsite.json index a79d28e..29a4535 100644 --- a/biobb_vs/test/data/config/config_bindingsite.json +++ b/biobb_vs/test/data/config/config_bindingsite.json @@ -1,7 +1,7 @@ { "properties": { "ligand": "PGA", - "matrix_name": "blosum62", + "matrix_name": "BLOSUM62", "gap_open": -10.0, "gap_extend": -0.5, "max_num_ligands": 15, diff --git a/biobb_vs/test/data/config/config_bindingsite.yml b/biobb_vs/test/data/config/config_bindingsite.yml index c00497c..ce8472d 100644 --- a/biobb_vs/test/data/config/config_bindingsite.yml +++ b/biobb_vs/test/data/config/config_bindingsite.yml @@ -2,6 +2,6 @@ properties: gap_extend: -0.5 gap_open: -10.0 ligand: PGA - matrix_name: blosum62 + matrix_name: BLOSUM62 max_num_ligands: 15 radius: 5 diff --git a/biobb_vs/utils/bindingsite.py b/biobb_vs/utils/bindingsite.py index fe5b28a..18a7e04 100755 --- a/biobb_vs/utils/bindingsite.py +++ b/biobb_vs/utils/bindingsite.py @@ -37,7 +37,7 @@ class BindingSite(BiobbObject): * **ligand** (*str*) - (None) Ligand to be found in the protein structure. If no ligand provided, the largest one will be selected, if more than one. * **radius** (*float*) - (5.0) [0.1~1000|0.1] Cut-off distance (Ã…ngstroms) around ligand atoms to consider a protein atom as a binding site atom. * **max_num_ligands** (*int*) - (15) [0~1000|1] Total number of superimposed ligands to be extracted from the identity cluster. For populated clusters, the restriction avoids to superimpose redundant structures. If 0, all ligands extracted will be considered. - * **matrix_name** (*str*) - ("blosum62") Substitution matrices for use in alignments. Values: benner6, benner22, benner74, blosum100, blosum30, blosum35, blosum40, blosum45, blosum50, blosum55, blosum60, blosum62, blosum65, blosum70, blosum75, blosum80, blosum85, blosum90, blosum95, feng, fitch, genetic, gonnet, grant, ident, johnson, levin, mclach, miyata, nwsgappep, pam120, pam180, pam250, pam30, pam300, pam60, pam90, rao, risler, structure. + * **matrix_name** (*str*) - ("BLOSUM62") Substitution matrices for use in alignments. Values: BENNER22, BENNER6, BENNER74, BLASTN, BLASTP, BLOSUM45, BLOSUM50, BLOSUM62, BLOSUM80, BLOSUM90, DAYHOFF, FENG, GENETIC, GONNET1992, HOXD70, JOHNSON, JONES, LEVIN, MCLACHLAN, MDM78, MEGABLAST, NUC.4.4, PAM250, PAM30, PAM70, RAO, RISLER, SCHNEIDER, STR, TRANS. * **gap_open** (*float*) - (-10.0) [-1000~1000|0.1] Gap open penalty. * **gap_extend** (*float*) - (-0.5) [-1000~1000|0.1] Gap extend penalty. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. @@ -49,7 +49,7 @@ class BindingSite(BiobbObject): from biobb_vs.utils.bindingsite import bindingsite prop = { 'ligand': 'PGA', - 'matrix_name': 'blosum62', + 'matrix_name': 'BLOSUM62', 'gap_open': -10.0, 'gap_extend': -0.5, 'max_num_ligands': 15, @@ -89,7 +89,7 @@ def __init__(self, input_pdb_path, input_clusters_zip, output_pdb_path, self.ligand = properties.get('ligand', None) self.radius = float(properties.get('radius', 5.0)) self.max_num_ligands = properties.get('max_num_ligands', 15) - self.matrix_name = properties.get('matrix_name', 'blosum62') + self.matrix_name = properties.get('matrix_name', 'BLOSUM62') self.gap_open = properties.get('gap_open', -10.0) self.gap_extend = properties.get('gap_extend', -0.5) self.properties = properties diff --git a/biobb_vs/utils/common.py b/biobb_vs/utils/common.py index 66f5405..cf2684a 100644 --- a/biobb_vs/utils/common.py +++ b/biobb_vs/utils/common.py @@ -91,7 +91,7 @@ def get_sequence_nucs(structure): return seq -def align_sequences(seqA, seqB, matrix_name='blosum62', gap_open=-10.0, gap_extend=-0.5): +def align_sequences(seqA, seqB, matrix_name='BLOSUM62', gap_open=-10.0, gap_extend=-0.5): """ Performs a global pairwise alignment between two sequences using the Needleman-Wunsch algorithm as implemented in Biopython. Returns the alignment and the residue mapping between both original sequences.