Skip to content

Commit aa3d01f

Browse files
author
Jon Palmer
committed
add override for --augustus-species and --busco-lineage to train; #13
1 parent 63f3aac commit aa3d01f

File tree

5 files changed

+71
-62
lines changed

5 files changed

+71
-62
lines changed

CITATION.cff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cff-version: version = "25.6.20"
1+
cff-version: version = "25.6.22"
22
title: 'funannotate2: eukaryotic genome annotation'
33
message: >-
44
If you use this software, please cite it using the
@@ -17,5 +17,5 @@ keywords:
1717
- functional annotation
1818
- consensus gene models
1919
license: BSD-2-Clause
20-
version: version = "25.6.20"
21-
date-released: '2025-06-20'
20+
version: version = "25.6.22"
21+
date-released: '2025-06-22'

funannotate2/__main__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,18 @@ def train_subparser(subparsers):
338338
help="Max length for fasta headers",
339339
metavar="",
340340
)
341+
optional_args.add_argument(
342+
"--busco-lineage",
343+
dest="busco_lineage",
344+
help="BUSCO lineage to use, over-rides default auto selection",
345+
metavar="",
346+
)
347+
optional_args.add_argument(
348+
"--augustus-species",
349+
dest="augustus_species",
350+
help="Pre-trained augustus species to use as initial parameters for BUSCO, over-rides default auto selection",
351+
metavar="",
352+
)
341353
other_args = group.add_argument_group("Other arguments")
342354
other_args.add_argument(
343355
"-h",

funannotate2/config.py

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -184,15 +184,6 @@
184184
"genus": "Brugia",
185185
"species": "Brugia malayi",
186186
},
187-
"b_pseudomallei": {
188-
"superkingdom": "Bacteria",
189-
"phylum": "Pseudomonadota",
190-
"class": "Betaproteobacteria",
191-
"order": "Burkholderiales",
192-
"family": "Burkholderiaceae",
193-
"genus": "Burkholderia",
194-
"species": "Burkholderia pseudomallei",
195-
},
196187
"caenorhabditis": {
197188
"superkingdom": "Eukaryota",
198189
"kingdom": "Metazoa",
@@ -468,15 +459,6 @@
468459
"genus": "Eremothecium",
469460
"species": "Eremothecium gossypii",
470461
},
471-
"E_coli_K12": {
472-
"superkingdom": "Bacteria",
473-
"phylum": "Pseudomonadota",
474-
"class": "Gammaproteobacteria",
475-
"order": "Enterobacterales",
476-
"family": "Enterobacteriaceae",
477-
"genus": "Escherichia",
478-
"species": "Escherichia coli",
479-
},
480462
"fusarium_graminearum": {
481463
"superkingdom": "Eukaryota",
482464
"kingdom": "Fungi",
@@ -868,24 +850,6 @@
868850
"genus": "Solanum",
869851
"species": "Solanum lycopersicum",
870852
},
871-
"s_aureus": {
872-
"superkingdom": "Bacteria",
873-
"phylum": "Bacillota",
874-
"class": "Bacilli",
875-
"order": "Bacillales",
876-
"family": "Staphylococcaceae",
877-
"genus": "Staphylococcus",
878-
"species": "Staphylococcus aureus",
879-
},
880-
"s_pneumoniae": {
881-
"superkingdom": "Bacteria",
882-
"phylum": "Bacillota",
883-
"class": "Bacilli",
884-
"order": "Lactobacillales",
885-
"family": "Streptococcaceae",
886-
"genus": "Streptococcus",
887-
"species": "Streptococcus pneumoniae",
888-
},
889853
"strongylocentrotus_purpuratus": {
890854
"superkingdom": "Eukaryota",
891855
"kingdom": "Metazoa",
@@ -896,14 +860,6 @@
896860
"genus": "Strongylocentrotus",
897861
"species": "Strongylocentrotus purpuratus",
898862
},
899-
"sulfolobus_solfataricus": {
900-
"superkingdom": "Archaea",
901-
"phylum": "Thermoproteota",
902-
"class": "Thermoprotei",
903-
"order": "Sulfolobales",
904-
"family": "Sulfolobaceae",
905-
"genus": "Sulfolobus",
906-
},
907863
"tetrahymena": {
908864
"superkingdom": "Eukaryota",
909865
"phylum": "Ciliophora",
@@ -923,14 +879,6 @@
923879
"genus": "Theobroma",
924880
"species": "Theobroma cacao",
925881
},
926-
"thermoanaerobacter_tengcongensis": {
927-
"superkingdom": "Bacteria",
928-
"phylum": "Bacillota",
929-
"class": "Clostridia",
930-
"order": "Thermoanaerobacterales",
931-
"family": "Thermoanaerobacteraceae",
932-
"genus": "Thermoanaerobacter",
933-
},
934882
"toxoplasma": {
935883
"superkingdom": "Eukaryota",
936884
"phylum": "Apicomplexa",

funannotate2/train.py

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,33 @@
3333
get_odb_version,
3434
rename_gff_contigs,
3535
)
36+
from .config import augustus_species, busco_taxonomy
37+
38+
39+
def validate_augustus_species(species_name):
40+
"""
41+
Validate that the provided Augustus species is available in the config.
42+
43+
Parameters:
44+
- species_name (str): The Augustus species name to validate
45+
46+
Returns:
47+
- bool: True if valid, False otherwise
48+
"""
49+
return species_name in augustus_species
50+
51+
52+
def validate_busco_lineage(lineage_name):
53+
"""
54+
Validate that the provided BUSCO lineage is available in the config.
55+
56+
Parameters:
57+
- lineage_name (str): The BUSCO lineage name to validate
58+
59+
Returns:
60+
- bool: True if valid, False otherwise
61+
"""
62+
return lineage_name in busco_taxonomy
3663

3764

3865
def train(args):
@@ -93,12 +120,34 @@ def train(args):
93120
taxonomy = lookup_taxonomy(args.species)
94121
logger.info(f"Getting taxonomy information\n{json.dumps(taxonomy, indent=2)}")
95122

96-
# choose best augustus species based on taxonomy
97-
aug_species = choose_best_augustus_species(taxonomy)
98-
logger.info(f"Choosing best augustus species based on taxonomy: {aug_species}")
99-
100-
# choose best busco species
101-
busco_species = choose_best_busco_species(taxonomy)
123+
# validate and set augustus species
124+
if args.augustus_species:
125+
if not validate_augustus_species(args.augustus_species):
126+
logger.critical(f"Invalid Augustus species: {args.augustus_species}")
127+
logger.critical(
128+
f"Valid options are: {', '.join(sorted(augustus_species.keys()))}"
129+
)
130+
raise SystemExit(1)
131+
aug_species = args.augustus_species
132+
logger.info(f"Using user-specified Augustus species: {aug_species}")
133+
else:
134+
# choose best augustus species based on taxonomy
135+
aug_species = choose_best_augustus_species(taxonomy)
136+
logger.info(f"Choosing best augustus species based on taxonomy: {aug_species}")
137+
138+
# validate and set busco lineage
139+
if args.busco_lineage:
140+
if not validate_busco_lineage(args.busco_lineage):
141+
logger.critical(f"Invalid BUSCO lineage: {args.busco_lineage}")
142+
logger.critical(
143+
f"Valid options are: {', '.join(sorted(busco_taxonomy.keys()))}"
144+
)
145+
raise SystemExit(1)
146+
busco_species = args.busco_lineage
147+
logger.info(f"Using user-specified BUSCO lineage: {busco_species}")
148+
else:
149+
# choose best busco species
150+
busco_species = choose_best_busco_species(taxonomy)
102151
# pull the latest odb version from downloads link
103152
odb_version = get_odb_version(
104153
os.path.join(os.path.dirname(__file__), "downloads.json")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "funannotate2"
7-
version = "25.6.20"
7+
version = "25.6.22"
88
description = "Funannotate2: eukarytoic genome annotation pipeline"
99
readme = {file = "README.md", content-type = "text/markdown"}
1010
authors = [

0 commit comments

Comments
 (0)