diff --git a/Dockerfile b/Dockerfile index 85ed3fc..1d23b06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,97 +1,15 @@ -FROM kbase/kbase:sdkbase2.latest +FROM kbase/sdkbase2:python MAINTAINER KBase Developer -# ----------------------------------------- -# In this section, you can install any system dependencies required -# to run your App. For instance, you could place an apt-get update or -# install line here, a git checkout to download code, or run any other -# installation scripts. - -## upgrade python3 to v3.7 -#RUN apt update -#RUN echo "Y" | apt install software-properties-common -#RUN add-apt-repository ppa:deadsnakes/ppa -#RUN apt update -#RUN echo "Y" | apt install python3.7 - - -# update package managers -#RUN apt-get update -#RUN apt-get update && apt-get install -y python3-pip -#RUN apt-get update && apt-get install -y python=3.6 python3-pip -#RUN pip install --upgrade pip - # install cython that pysam likes -RUN apt-get update && apt-get install -y build-essential +RUN apt-get update && apt-get install -y build-essential wget RUN pip install --upgrade pip setuptools Cython==0.25 - -# Here we install a python coverage tool and an -# https library that is out of date in the base image. - -#RUN pip install coverage - -# Hope to solve the "Could not find .egg-info directory in install record for checkm-genome, etc." -#RUN pip install --upgrade setuptools - -## update security libraries in the base image -#RUN pip install cffi --upgrade \ -# && pip install pyopenssl --upgrade \ -# && pip install ndg-httpsclient --upgrade \ -# && pip install pyasn1 --upgrade \ -# && pip install requests --upgrade \ -# && pip install 'requests[security]' --upgrade - - - ###### CheckM installation -# Directions from https://github.com/Ecogenomics/CheckM/wiki/Installation#how-to-install-checkm -#System requirements -# -#CheckM is designed to run on Linux. The limiting requirement for CheckM is memory. Inference of lineage-specific marker sets using the full reference genome tree required approximately 40 GB of memory. However, a reduced genome tree (--reduced_tree) can also be used to infer lineage-specific marker sets which is suitable for machines with as little as 16 GB of memory. We recommend using the full tree if possible, though our results suggest that the same lineage-specific marker set will be selected for the vast majority of genomes regardless of the underlying reference tree. System requirements are far more modest if you plan to make use of taxonomic-specific marker sets or your own custom marker genes as this bypasses the need to place genomes in the reference genome tree. -# -#How to install CheckM -# -#CheckM requires the following programs to be added to your system path: -# -#HMMER (>=3.1b1) -#prodigal (2.60 or >=2.6.1) -#executable must be named prodigal and not prodigal.linux -#pplacer (>=1.1) -#guppy, which is part of the pplacer package, must also be on your system path -#pplacer binaries can be found on the pplacer GitHub page -#CheckM is a Python 2.x program and we recommend installing it through pip: -# -#> sudo pip install numpy -#> sudo pip install checkm-genome -# -#This will install CheckM and all other required Python libraries. -# -#CheckM relies on a number of precalculated data files. To install these run: -# -#> sudo checkm data update -# -#This will prompt you for an installation directory for the required data files. You can update the data files in the future by re-running this command. If you are unable to automatically download these files (e.g., you are behind a proxy), the files can be manually downloaded from https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_v1.0.7.tar.gz. Decompress this file to an appropriate folder and run checkm data setRoot to inform CheckM of where the files have been placed. -# -#CheckM is now ready to run. For a list of CheckM commands type: -# -#> checkm -# -#If desired, you can also download the latest release of CheckM and install it manually. CheckM makes use of the following Python libraries: -# -#python >= 2.7 and < 3.0 -#numpy >= 1.8.0 -#scipy >= 0.9.0 -#matplotlib >= 1.3.1 -#pysam >= 0.8.3 -#dendropy >= 4.0.0 -#ScreamingBackpack >= 0.2.3 - -# -#### OK, got that cleared up. Now install CheckM, but not data -# +# Directions available here: https://github.com/Ecogenomics/CheckM/wiki/Installation#how-to-install-checkm +# Now install CheckM, but not data # Install HMMER WORKDIR /kb/module @@ -119,23 +37,11 @@ RUN \ #ENV PATH "$PATH:/kb/development/bin/prodigal" -# Install Pplacer -# NOTE: The following block is replaced by the following section because the need of installing -# opam and its respective dependencies has been a big hassle and unsuccessful -# WORKDIR /kb/module -#RUN \ -# curl -s https://codeload.github.com/matsen/pplacer/tar.gz/v1.1.alpha19 > pplacer-1.1.alpha19.tar.gz && \ -# tar -xvzf pplacer-1.1.alpha19.tar.gz && \ -# ln -s pplacer-1.1.alpha19 pplacer && \ -# rm -f pplacer-1.1.alpha19.tar && \ -# cd pplacer && \ -# cat opam-requirements.txt | xargs opam install -y && \ -# make all WORKDIR /kb/module RUN \ wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplacer-linux-v1.1.alpha19.zip && \ - unzip pplacer-linux-v1.1.alpha19.zip && \ + unzip pplacer-linux-v1.1.alpha19.zip && \ ln -s pplacer-Linux-v1.1.alpha19 pplacer && \ rm -f pplacer-linux-v1.1.alpha19.zip && \ rm -f pplacer-1.1.alpha19.tar.gz && \ @@ -145,30 +51,23 @@ ENV PATH "$PATH:/kb/deployment/bin/pplacer" # Install CheckM (collected packages: checkm-genome, pysam, dendropy, ScreamingBackpack) -# Until seeing "Successfully installed ScreamingBackpack-0.2.333 checkm-genome-1.0.8 dendropy-4.2.0 pysam-0.10.0" WORKDIR /kb/module # Pysam installation failing with pip, but working with pip3 # pip install pysam \ RUN \ pip install pysam numpy \ - && pip install checkm-genome==1.0.18 \ - && cp -R /usr/local/bin/checkm /kb/deployment/bin/CheckMBin -#RUN pip3 install pysam \ -# && pip3 install dendropy \ -# && pip3 install checkm-genome==1.0.18 -#RUN pip install ScreamingBackpack -#RUN cp -R /usr/local/bin/checkm /kb/deployment/bin/CheckMBin + && pip install checkm-genome==1.1.2 \ + && cp -R /miniconda/bin/checkm /kb/deployment/bin/CheckMBin # For checkm-genome required data RUN \ - mkdir /data && \ - mv /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG.orig && \ - touch /data/DATA_CONFIG && \ - cp /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG && \ - ln -sf /data/DATA_CONFIG /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG - -RUN mkdir -p /data/checkm_data + mkdir /data && \ + mv /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG.orig && \ + touch /data/DATA_CONFIG && \ + cp /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG && \ + ln -sf /data/DATA_CONFIG /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG && \ + mkdir -p /data/checkm_data # ----------------------------------------- COPY ./ /kb/module diff --git a/lib/kb_Msuite/Utils/DataStagingUtils.py b/lib/kb_Msuite/Utils/DataStagingUtils.py index c9acd37..8165369 100644 --- a/lib/kb_Msuite/Utils/DataStagingUtils.py +++ b/lib/kb_Msuite/Utils/DataStagingUtils.py @@ -92,7 +92,7 @@ def stage_input(self, input_ref, fasta_file_extension): # AssemblySet # elif type_name == 'KBaseSets.AssemblySet': - + # read assemblySet try: assemblySet_obj = setAPI_Client.get_assembly_set_v1 ({'ref':input_ref, 'include_item_info':1}) @@ -100,7 +100,7 @@ def stage_input(self, input_ref, fasta_file_extension): raise ValueError('Unable to get object from workspace: (' + input_ref +')' + str(e)) assembly_refs = [] assembly_names = [] - for assembly_item in assemblySet_obj['data']['items']: + for assembly_item in assemblySet_obj['data']['items']: this_assembly_ref = assembly_item['ref'] # assembly obj info try: @@ -109,7 +109,7 @@ def stage_input(self, input_ref, fasta_file_extension): except Exception as e: raise ValueError('Unable to get object from workspace: (' + this_assembly_ref +'): ' + str(e)) assembly_refs.append(this_assembly_ref) - assembly_names.append(this_assembly_name) + assembly_names.append(this_assembly_name) # create file data (name for file is what's reported in results) for ass_i,assembly_ref in enumerate(assembly_refs): @@ -221,7 +221,7 @@ def fasta_seq_len_at_least(self, fasta_path, min_fasta_len=1): counts the number of non-header, non-whitespace characters in a FASTA file ''' seq_len = 0 - with open (fasta_path, 'r', 0) as fasta_handle: + with open (fasta_path, 'r') as fasta_handle: for line in fasta_handle: line = line.strip() if line.startswith('>'): @@ -354,7 +354,7 @@ def build_bin_summary_file_from_binnedcontigs_obj(self, input_ref, bin_dir, bin_ for bin_item in binned_contig_obj['bins']: #print ("BIN_ITEM[bid]: "+bin_item['bid']) # DEBUG bin_ID = re.sub ('^[^\.]+\.', '', bin_item['bid'].replace('.'+fasta_extension,'')) - + #print ("BIN_ID: "+bin_ID) # DEBUG bin_summary_info[bin_ID] = { 'n_contigs': bin_item['n_contigs'], 'gc': round (100.0 * float(bin_item['gc']), 1), diff --git a/lib/kb_Msuite/Utils/simple_run_checkm.py b/lib/kb_Msuite/Utils/simple_run_checkm.py index 0652852..e12438e 100644 --- a/lib/kb_Msuite/Utils/simple_run_checkm.py +++ b/lib/kb_Msuite/Utils/simple_run_checkm.py @@ -14,7 +14,7 @@ def run_checkm(input_dir, output_dir, log_path, options={}): options - optional - dictionary of lineage_wf options """ args = ['checkm', 'lineage_wf', input_dir, output_dir] - for opt, val in (options or {}).iteritems(): + for opt, val in (options or {}).items(): args.append(opt) if val: args.append(str(val)) @@ -22,4 +22,4 @@ def run_checkm(input_dir, output_dir, log_path, options={}): proc = Popen(args, stdout=PIPE, stderr=STDOUT) with proc.stdout, open(log_path, 'w') as logfile: for line in iter(proc.stdout.readline, b''): - logfile.write(line) + logfile.write(line.decode("utf-8")) diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 68434ef..9171655 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -17,7 +17,7 @@ elif [ "${1}" = "async" ] ; then sh ./scripts/run_async.sh elif [ "${1}" = "init" ] ; then echo "Initialize module" - cp /miniconda/lib/python2.7/site-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG + cp /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG mkdir /data/checkm_data cd /data/checkm_data echo "downloading: https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz" diff --git a/scripts/prepare_deploy_cfg.py b/scripts/prepare_deploy_cfg.py index e3743ec..0b35473 100644 --- a/scripts/prepare_deploy_cfg.py +++ b/scripts/prepare_deploy_cfg.py @@ -2,8 +2,8 @@ import os import os.path from jinja2 import Template -from ConfigParser import ConfigParser -import StringIO +from configparser import ConfigParser +from io import StringIO if __name__ == "__main__": if len(sys.argv) != 3: