Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 13 additions & 114 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,97 +1,15 @@
FROM kbase/kbase:sdkbase2.latest
FROM kbase/sdkbase2:python
MAINTAINER KBase Developer
# -----------------------------------------
# In this section, you can install any system dependencies required
# to run your App. For instance, you could place an apt-get update or
# install line here, a git checkout to download code, or run any other
# installation scripts.

## upgrade python3 to v3.7
#RUN apt update
#RUN echo "Y" | apt install software-properties-common
#RUN add-apt-repository ppa:deadsnakes/ppa
#RUN apt update
#RUN echo "Y" | apt install python3.7


# update package managers
#RUN apt-get update
#RUN apt-get update && apt-get install -y python3-pip
#RUN apt-get update && apt-get install -y python=3.6 python3-pip
#RUN pip install --upgrade pip


# install cython that pysam likes
RUN apt-get update && apt-get install -y build-essential
RUN apt-get update && apt-get install -y build-essential wget
RUN pip install --upgrade pip setuptools Cython==0.25



# Here we install a python coverage tool and an
# https library that is out of date in the base image.

#RUN pip install coverage

# Hope to solve the "Could not find .egg-info directory in install record for checkm-genome, etc."
#RUN pip install --upgrade setuptools

## update security libraries in the base image
#RUN pip install cffi --upgrade \
# && pip install pyopenssl --upgrade \
# && pip install ndg-httpsclient --upgrade \
# && pip install pyasn1 --upgrade \
# && pip install requests --upgrade \
# && pip install 'requests[security]' --upgrade



###### CheckM installation
# Directions from https://github.com/Ecogenomics/CheckM/wiki/Installation#how-to-install-checkm
#System requirements
#
#CheckM is designed to run on Linux. The limiting requirement for CheckM is memory. Inference of lineage-specific marker sets using the full reference genome tree required approximately 40 GB of memory. However, a reduced genome tree (--reduced_tree) can also be used to infer lineage-specific marker sets which is suitable for machines with as little as 16 GB of memory. We recommend using the full tree if possible, though our results suggest that the same lineage-specific marker set will be selected for the vast majority of genomes regardless of the underlying reference tree. System requirements are far more modest if you plan to make use of taxonomic-specific marker sets or your own custom marker genes as this bypasses the need to place genomes in the reference genome tree.
#
#How to install CheckM
#
#CheckM requires the following programs to be added to your system path:
#
#HMMER (>=3.1b1)
#prodigal (2.60 or >=2.6.1)
#executable must be named prodigal and not prodigal.linux
#pplacer (>=1.1)
#guppy, which is part of the pplacer package, must also be on your system path
#pplacer binaries can be found on the pplacer GitHub page
#CheckM is a Python 2.x program and we recommend installing it through pip:
#
#> sudo pip install numpy
#> sudo pip install checkm-genome
#
#This will install CheckM and all other required Python libraries.
#
#CheckM relies on a number of precalculated data files. To install these run:
#
#> sudo checkm data update
#
#This will prompt you for an installation directory for the required data files. You can update the data files in the future by re-running this command. If you are unable to automatically download these files (e.g., you are behind a proxy), the files can be manually downloaded from https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_v1.0.7.tar.gz. Decompress this file to an appropriate folder and run checkm data setRoot <data_directory> to inform CheckM of where the files have been placed.
#
#CheckM is now ready to run. For a list of CheckM commands type:
#
#> checkm
#
#If desired, you can also download the latest release of CheckM and install it manually. CheckM makes use of the following Python libraries:
#
#python >= 2.7 and < 3.0
#numpy >= 1.8.0
#scipy >= 0.9.0
#matplotlib >= 1.3.1
#pysam >= 0.8.3
#dendropy >= 4.0.0
#ScreamingBackpack >= 0.2.3

#
#### OK, got that cleared up. Now install CheckM, but not data
#
# Directions available here: https://github.com/Ecogenomics/CheckM/wiki/Installation#how-to-install-checkm

# Now install CheckM, but not data

# Install HMMER
WORKDIR /kb/module
Expand Down Expand Up @@ -119,23 +37,11 @@ RUN \

#ENV PATH "$PATH:/kb/development/bin/prodigal"

# Install Pplacer
# NOTE: The following block is replaced by the following section because the need of installing
# opam and its respective dependencies has been a big hassle and unsuccessful
# WORKDIR /kb/module
#RUN \
# curl -s https://codeload.github.com/matsen/pplacer/tar.gz/v1.1.alpha19 > pplacer-1.1.alpha19.tar.gz && \
# tar -xvzf pplacer-1.1.alpha19.tar.gz && \
# ln -s pplacer-1.1.alpha19 pplacer && \
# rm -f pplacer-1.1.alpha19.tar && \
# cd pplacer && \
# cat opam-requirements.txt | xargs opam install -y && \
# make all

WORKDIR /kb/module
RUN \
wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplacer-linux-v1.1.alpha19.zip && \
unzip pplacer-linux-v1.1.alpha19.zip && \
unzip pplacer-linux-v1.1.alpha19.zip && \
ln -s pplacer-Linux-v1.1.alpha19 pplacer && \
rm -f pplacer-linux-v1.1.alpha19.zip && \
rm -f pplacer-1.1.alpha19.tar.gz && \
Expand All @@ -145,30 +51,23 @@ ENV PATH "$PATH:/kb/deployment/bin/pplacer"


# Install CheckM (collected packages: checkm-genome, pysam, dendropy, ScreamingBackpack)
# Until seeing "Successfully installed ScreamingBackpack-0.2.333 checkm-genome-1.0.8 dendropy-4.2.0 pysam-0.10.0"
WORKDIR /kb/module

# Pysam installation failing with pip, but working with pip3
# pip install pysam \
RUN \
pip install pysam numpy \
&& pip install checkm-genome==1.0.18 \
&& cp -R /usr/local/bin/checkm /kb/deployment/bin/CheckMBin
#RUN pip3 install pysam \
# && pip3 install dendropy \
# && pip3 install checkm-genome==1.0.18
#RUN pip install ScreamingBackpack
#RUN cp -R /usr/local/bin/checkm /kb/deployment/bin/CheckMBin
&& pip install checkm-genome==1.1.2 \
&& cp -R /miniconda/bin/checkm /kb/deployment/bin/CheckMBin

# For checkm-genome required data
RUN \
mkdir /data && \
mv /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG.orig && \
touch /data/DATA_CONFIG && \
cp /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG && \
ln -sf /data/DATA_CONFIG /usr/local/lib/python2.7/dist-packages/checkm/DATA_CONFIG

RUN mkdir -p /data/checkm_data
mkdir /data && \
mv /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG.orig && \
touch /data/DATA_CONFIG && \
cp /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG && \
ln -sf /data/DATA_CONFIG /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG && \
mkdir -p /data/checkm_data

# -----------------------------------------
COPY ./ /kb/module
Expand Down
10 changes: 5 additions & 5 deletions lib/kb_Msuite/Utils/DataStagingUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ def stage_input(self, input_ref, fasta_file_extension):
# AssemblySet
#
elif type_name == 'KBaseSets.AssemblySet':

# read assemblySet
try:
assemblySet_obj = setAPI_Client.get_assembly_set_v1 ({'ref':input_ref, 'include_item_info':1})
except Exception as e:
raise ValueError('Unable to get object from workspace: (' + input_ref +')' + str(e))
assembly_refs = []
assembly_names = []
for assembly_item in assemblySet_obj['data']['items']:
for assembly_item in assemblySet_obj['data']['items']:
this_assembly_ref = assembly_item['ref']
# assembly obj info
try:
Expand All @@ -109,7 +109,7 @@ def stage_input(self, input_ref, fasta_file_extension):
except Exception as e:
raise ValueError('Unable to get object from workspace: (' + this_assembly_ref +'): ' + str(e))
assembly_refs.append(this_assembly_ref)
assembly_names.append(this_assembly_name)
assembly_names.append(this_assembly_name)

# create file data (name for file is what's reported in results)
for ass_i,assembly_ref in enumerate(assembly_refs):
Expand Down Expand Up @@ -221,7 +221,7 @@ def fasta_seq_len_at_least(self, fasta_path, min_fasta_len=1):
counts the number of non-header, non-whitespace characters in a FASTA file
'''
seq_len = 0
with open (fasta_path, 'r', 0) as fasta_handle:
with open (fasta_path, 'r') as fasta_handle:
for line in fasta_handle:
line = line.strip()
if line.startswith('>'):
Expand Down Expand Up @@ -354,7 +354,7 @@ def build_bin_summary_file_from_binnedcontigs_obj(self, input_ref, bin_dir, bin_
for bin_item in binned_contig_obj['bins']:
#print ("BIN_ITEM[bid]: "+bin_item['bid']) # DEBUG
bin_ID = re.sub ('^[^\.]+\.', '', bin_item['bid'].replace('.'+fasta_extension,''))

#print ("BIN_ID: "+bin_ID) # DEBUG
bin_summary_info[bin_ID] = { 'n_contigs': bin_item['n_contigs'],
'gc': round (100.0 * float(bin_item['gc']), 1),
Expand Down
4 changes: 2 additions & 2 deletions lib/kb_Msuite/Utils/simple_run_checkm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ def run_checkm(input_dir, output_dir, log_path, options={}):
options - optional - dictionary of lineage_wf options
"""
args = ['checkm', 'lineage_wf', input_dir, output_dir]
for opt, val in (options or {}).iteritems():
for opt, val in (options or {}).items():
args.append(opt)
if val:
args.append(str(val))
print('Running: ' + ' '.join(args))
proc = Popen(args, stdout=PIPE, stderr=STDOUT)
with proc.stdout, open(log_path, 'w') as logfile:
for line in iter(proc.stdout.readline, b''):
logfile.write(line)
logfile.write(line.decode("utf-8"))
2 changes: 1 addition & 1 deletion scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ elif [ "${1}" = "async" ] ; then
sh ./scripts/run_async.sh
elif [ "${1}" = "init" ] ; then
echo "Initialize module"
cp /miniconda/lib/python2.7/site-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG
cp /miniconda/lib/python3.6/site-packages/checkm/DATA_CONFIG.orig /data/DATA_CONFIG
mkdir /data/checkm_data
cd /data/checkm_data
echo "downloading: https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz"
Expand Down
4 changes: 2 additions & 2 deletions scripts/prepare_deploy_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os
import os.path
from jinja2 import Template
from ConfigParser import ConfigParser
import StringIO
from configparser import ConfigParser
from io import StringIO

if __name__ == "__main__":
if len(sys.argv) != 3:
Expand Down