-
Notifications
You must be signed in to change notification settings - Fork 4
Open
Description
Updating the local NG-MAST database via the update function fails, because NG-MAST is now hosted on pubMLST.org the original website is down.
Here is a function I wrote that updates NG-MAST for ngmaster successfully.
import subprocess, os
import pandas as pd
from Bio import SeqIO
def updateNGMAST(dbdir):
"""
Downloads allele sequences and schema from pubMLST and converts to correct
formats for ngmaster.
Parameters
----------
dbdir : PATH
Path to database directory.
Returns
-------
None.
"""
# Download new allele files and profile from pubMLST.org
subprocess.call("wget --no-check-certificate -O " + dbdir + "/alleledb/ngmast/POR.tfa.new https://rest.pubmlst.org/db/pubmlst_neisseria_seqdef/loci/NG-MAST_porB/alleles_fasta", shell=True)
subprocess.call("wget --no-check-certificate -O " + dbdir + "/alleledb/ngmast/TBPB.tfa.new https://rest.pubmlst.org/db/pubmlst_neisseria_seqdef/loci/NG-MAST_tbpB/alleles_fasta", shell=True)
subprocess.call("wget --no-check-certificate -O " + dbdir + "/alleledb/ngmast/ng_mast.txt.new https://rest.pubmlst.org/db/pubmlst_neisseria_seqdef/schemes/71/profiles_csv", shell=True)
# Backup current ng_mast.txt and convert new one to correct format
new = pd.read_csv(dbdir + "/alleledb/ngmast/ng_mast.txt.new", sep="\t")
os.rename(dbdir + "/alleledb/ngmast/ng_mast.txt", dbdir + "/alleledb/ngmast/ng_mast.txt.old")
new.rename(columns={"NG-MAST_porB":"POR", "NG-MAST_tbpB":"TBPB"}, inplace=True)
new.set_index("ST", inplace=True)
new.to_csv(dbdir + "/alleledb/ngmast/ng_mast.txt")
# Backup current tfa files and convert fasta headers in new ones to correct format
os.rename(dbdir + "/alleledb/ngmast/POR.tfa", dbdir + "/alleledb/ngmast/POR.tfa.old")
os.rename(dbdir + "/alleledb/ngmast/TBPB.tfa", dbdir + "/alleledb/ngmast/TBPB.tfa.old")
por_records = []
for record in SeqIO.parse(dbdir + "/alleledb/ngmast/POR.tfa.new", "fasta"):
record.id = "POR" + str(record.id).split("_")[2]
record.name = ""
record.description = ""
por_records.append(record)
SeqIO.write(por_records, dbdir + "/alleledb/ngmast/POR.tfa", "fasta-2line")
tbpb_records = []
for record in SeqIO.parse(dbdir + "/alleledb/ngmast/TBPB.tfa.new", "fasta"):
record.id = "TBPB" + str(record.id).split("_")[2]
record.name = ""
record.description = ""
tbpb_records.append(record)
SeqIO.write(por_records, dbdir + "/alleledb/ngmast/TBPB.tfa", "fasta-2line")
# Clean up after update
subprocess.call("rm " + dbdir + "/alleledb/ngmast/*.new", shell=True)
returnReactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels