Skip to content

www.ng-mast.net is down #34

@mschmerer

Description

@mschmerer

Updating the local NG-MAST database via the update function fails, because NG-MAST is now hosted on pubMLST.org the original website is down.

Here is a function I wrote that updates NG-MAST for ngmaster successfully.

import subprocess, os
import pandas as pd
from Bio import SeqIO

def updateNGMAST(dbdir):
    """
    Downloads allele sequences and schema from pubMLST and converts to correct 
    formats for ngmaster.

    Parameters
    ----------
    dbdir : PATH
        Path to database directory.

    Returns
    -------
    None.

    """

    # Download new allele files and profile from pubMLST.org
    subprocess.call("wget --no-check-certificate -O " + dbdir + "/alleledb/ngmast/POR.tfa.new https://rest.pubmlst.org/db/pubmlst_neisseria_seqdef/loci/NG-MAST_porB/alleles_fasta", shell=True)
    subprocess.call("wget --no-check-certificate -O " + dbdir + "/alleledb/ngmast/TBPB.tfa.new https://rest.pubmlst.org/db/pubmlst_neisseria_seqdef/loci/NG-MAST_tbpB/alleles_fasta", shell=True)
    subprocess.call("wget --no-check-certificate -O " + dbdir + "/alleledb/ngmast/ng_mast.txt.new https://rest.pubmlst.org/db/pubmlst_neisseria_seqdef/schemes/71/profiles_csv", shell=True)

    # Backup current ng_mast.txt and convert new one to correct format
    new = pd.read_csv(dbdir + "/alleledb/ngmast/ng_mast.txt.new", sep="\t")
    os.rename(dbdir + "/alleledb/ngmast/ng_mast.txt", dbdir + "/alleledb/ngmast/ng_mast.txt.old")
    new.rename(columns={"NG-MAST_porB":"POR", "NG-MAST_tbpB":"TBPB"}, inplace=True)
    new.set_index("ST", inplace=True)
    new.to_csv(dbdir + "/alleledb/ngmast/ng_mast.txt")

    # Backup current tfa files and convert fasta headers in new ones to correct format
    os.rename(dbdir + "/alleledb/ngmast/POR.tfa", dbdir + "/alleledb/ngmast/POR.tfa.old")
    os.rename(dbdir + "/alleledb/ngmast/TBPB.tfa", dbdir + "/alleledb/ngmast/TBPB.tfa.old")

    por_records = []
    for record in SeqIO.parse(dbdir + "/alleledb/ngmast/POR.tfa.new", "fasta"):
        record.id = "POR" + str(record.id).split("_")[2]
        record.name = ""
        record.description = ""
        por_records.append(record)
    SeqIO.write(por_records, dbdir + "/alleledb/ngmast/POR.tfa", "fasta-2line")

    tbpb_records = []
    for record in SeqIO.parse(dbdir + "/alleledb/ngmast/TBPB.tfa.new", "fasta"):
        record.id = "TBPB" + str(record.id).split("_")[2]
        record.name = ""
        record.description = ""
        tbpb_records.append(record)
    SeqIO.write(por_records, dbdir + "/alleledb/ngmast/TBPB.tfa", "fasta-2line")

    # Clean up after update
    subprocess.call("rm " + dbdir + "/alleledb/ngmast/*.new", shell=True)

    return

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions