+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index ce220d3..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-language: python
-python:
- - 2.7
-before_install:
- # We do this to make sure we get dependencies
- - sudo apt-get update -qq
-install:
- - pip install -r requirements.txt --use-mirrors
- - pip install .
-notifications:
- email: true
-script:
- # cd tests && sh TEST.sh
- - echo 'Not testing ATM'
diff --git a/contiguity/Contiguity.py b/Contiguity.py
similarity index 98%
rename from contiguity/Contiguity.py
rename to Contiguity.py
index e6586d2..ace3496 100755
--- a/contiguity/Contiguity.py
+++ b/Contiguity.py
@@ -24,7 +24,7 @@
__title__ = 'Contiguity'
-__version__ = '1.0.3'
+__version__ = '1.0.4'
__description__ = "Tool for visualising assemblies"
__author__ = 'Mitchell Sullivan'
__license__ = 'GPLv3'
@@ -2141,7 +2141,7 @@ def clear_all(self):
# load an assembly from a variety of files
def load_assembly(self):
- filename = tkFileDialog.askopenfilename()
+ filename = tkFileDialog.askopenfilename(title='select assembly graph file')
if filename == '' or filename == ():
return
self.clear_all()
@@ -2176,10 +2176,76 @@ def load_assembly(self):
elif what.startswith('#FASTG'):
self.load_fastg()
self.update_console('FASTG loaded.')
+ elif what.startswith('H\tVN:Z:'):
+ self.load_gfa()
+ self.update_console('GFA loaded.')
else:
tkMessageBox.showerror('Invalid format', 'Contiguity cannot recognise file type.')
self.writeWorkCont()
+ def load_gfa(self):
+ seqdict = {}
+ with open(self.csagfile.get()) as gfa:
+ for line in gfa:
+ if line.startswith('S\t'):
+ sname = line.split()[1]
+ seq = line.split()[2]
+ if seq == '*' and seqdict == {}:
+ fasta = tkFileDialog.askopenfilename(title='select FASTA file.')
+ with open(fasta) as f:
+ first = True
+ for faline in f:
+ if faline.startswith('>'):
+ if first:
+ first = False
+ else:
+ seqdict[name] = seq
+ name = faline[1:].split()[0]
+ seq = ''
+ else:
+ seq += faline.rstrip()
+ seqdict[name] = seq
+ if seq == '*':
+ seq = seqdict[sname]
+ aninstance = contig(sname, sname, seq)
+ self.contigDict[sname] = aninstance
+ if line.startswith('L\t') or line.startswith('C\t'):
+ splitline = line.split()
+ if splitline[2] == '+':
+ dira = True
+ else:
+ dira = False
+ if splitline[4] == '+':
+ dirb = True
+ else:
+ dirb = False
+ cigar = splitline[5]
+ intstring = ''
+ overlap = 0
+ for i in cigar:
+ if i.isdigit():
+ intstring += i
+ else:
+ if i in ['M', 'D', 'N', 'H', 'P']:
+ overlap += int(intstring)
+ intstring = ''
+ self.edgelist.append((splitline[1], dira, splitline[3], dirb, overlap))
+ for i in self.edgelist:
+ contiga, dira, contigb, dirb, overlap = i
+ if dira and dirb:
+ self.contigDict[contiga].to.append((contigb, True, overlap))
+ self.contigDict[contigb].fr.append((contiga, False, overlap))
+ elif dira and not dirb:
+ self.contigDict[contiga].to.append((contigb, False, overlap))
+ self.contigDict[contigb].to.append((contiga, False, overlap))
+ elif not dira and dirb:
+ self.contigDict[contiga].fr.append((contigb, True, overlap))
+ self.contigDict[contigb].fr.append((contiga, True, overlap))
+ else:
+ self.contigDict[contiga].fr.append((contigb, False, overlap))
+ self.contigDict[contigb].to.append((contiga, True, overlap))
+
+
# load ace file
def load_ace(self):
ace = open(self.csagfile.get())
@@ -2760,7 +2826,7 @@ def ok_edges(self):
global khmer
import khmer
except ImportError:
- proceed_no_khmer = tkMessageBox.askyesno('Khmer not found', 'Proceed without installing Khmer (Not recommended)?')
+ proceed_no_khmer = tkMessageBox.askyesno('Khmer not found.', 'Proceed without installing Khmer? (not recommended - see manual)?')
if proceed_no_khmer:
args.khmer = False
else:
@@ -3437,26 +3503,15 @@ def get_long_edge(self):
self.edgelist.append((i[1:], True, j[1:], False, 'nnnnnnnnn'))
def get_nmer_freq_khmer(self):
- nmersize, reads, ht_size, ht_n, n_threads = self.nmersize.get(), self.readfile.get(), self.ht_size.get(), self.ht_number.get(), self.num_threads.get()
- n_threads = 1
+ nmersize, reads, ht_size, ht_n = self.nmersize.get(), self.readfile.get(), self.ht_size.get(), self.ht_number.get()
ht_size = float('2e9')
ht_n = 4
bigcount = True
- self.ht = khmer.new_counting_hash(nmersize, ht_size, ht_n, n_threads) # HT_size, number ht, threads
+ self.ht = khmer.new_counting_hash(nmersize, ht_size, ht_n) # HT_size, number ht, threads
self.ht.set_use_bigcount(bigcount)
- rparser = khmer.ReadParser(reads, n_threads)
- threads = []
+ rparser = khmer.ReadParser(reads)
self.queue.put('consuming input ' + reads)
- for tnum in xrange(n_threads):
- t = \
- threading.Thread(
- target=self.ht.consume_fasta_with_reads_parser,
- args=(rparser, )
- )
- threads.append(t)
- t.start()
- for t in threads:
- t.join()
+ self.ht.consume_fasta_with_reads_parser(rparser)
fp_rate = khmer.calc_expected_collisions(self.ht)
self.queue.put('fp rate estimated to be %1.3f' % fp_rate)
if fp_rate > 0.20:
@@ -4875,22 +4930,32 @@ def writeMultiFasta(self):
parser = argparse.ArgumentParser(prog='Contiguity', formatter_class=argparse.RawDescriptionHelpFormatter, description='''
Contiguity.py: A pairwise comparison and contig adjacency graph exploration tool.
-USAGE: Contiguity.py -cl -c -fq -o
+Version: 1.0.4
+License: GPLv3
-REQUIREMENTS: With default settings Contigutiy requires at least 8gb of free memory (RAM)
+USAGE: Contiguity.py -cl -c -fq -o
contig file: FASTA file of contigs or scaffolds
read file: Interleaved fastq file - read1_left, read1_right, read2_left etc... orientated as such --> <--
output folder: folder to put output files in, can and will overwrite files in this folder, will create folder if folder doesn't exist
-Only other option to keep in mind is -rl if the read length is not 101bp
+REQUIREMENTS: With default settings Contigutiy requires at least (Potentially more) 6gb of free memory (RAM). This is
+because Contiguity uses Khmer to create a De Bruijn graph.
+
+If you are running into memory issues (e.g. the process is being killed by the Operating system (killed 9))
+you may want to reduce the hash table number or hash table size (using the flags -ht_n and -ht_s), or free up more memory.
+For large datasets more memory may need to be used.
+please read http://khmer.readthedocs.org/en/v1.1/choosing-table-sizes.html for more information about hash tables
+
+The only other option to keep in mind is -rl if the read length is not 101bp, you may want to increase this value
+for longer read lengths, or decrease for shorter read lengths (75% of maximum read length seems to work well).
''', epilog="Thanks for using Contiguity")
parser.add_argument('-co', '--contig_file', action='store', help='fasta file of assembled contigs or scaffolds')
parser.add_argument('-rf', '--read_file', action='store', help='read file')
parser.add_argument('-o', '--output_folder', action='store', help='output folder')
parser.add_argument('-k', '--kmer_size', action='store', type=int, default=31, help='k-mer size for finding adjacent contigs [31]')
-parser.add_argument('-max_d', '--max_distance', action='store', type=int, default=300, help='maximum distance apart in the de bruijn graph for contigs to count as adjacent [300]')
+parser.add_argument('-max_d', '--max_distance', action='store', type=int, default=200, help='maximum distance apart in the de bruijn graph for contigs to count as adjacent [300]')
parser.add_argument('-kmer_a', '--kmer_average', action='store', type=int, default=-1, help='All k-mers above half this value will be traversed [auto]')
parser.add_argument('-kmer_c', '--kmer_cutoff', action='store', type=int, default=-1, help='cutoff for k-mer values [auto]')
parser.add_argument('-ov', '--overlap', action='store', type=int, default=None, help='minimum overlap to create edge [kmer_size-1]')
@@ -4904,15 +4969,17 @@ def writeMultiFasta(self):
parser.add_argument('-nd', '--no_db_edges', action='store_true', default=False, help='Don\'t get De Bruijn edges')
parser.add_argument('-np', '--no_paired_edges', action='store_true', default=False, help='Don\'t get paired-end edges')
parser.add_argument('-km', '--khmer', action='store_false', default=True, help='Don\'t use khmer for De Bruijn graph contruction (not recommended)')
-parser.add_argument('-nt', '--num_threads', action='store', type=int, default=1, help='Number of threads to use for hash table building with khmer and for mapping reads with bowtie')
-parser.add_argument('-ht_s', '--ht_size', action='store', default='2e9', help='Hash table size, for more information check http://khmer.readthedocs.org/en/v1.1/choosing-table-sizes.html')
-parser.add_argument('-ht_n', '--ht_number', action='store', type=int, default=4, help='Hash table number, for more information check http://khmer.readthedocs.org/en/v1.1/choosing-table-sizes.html')
+parser.add_argument('-nt', '--num_threads', action='store', type=int, default=1, help='Number of threads to use for mapping reads with bowtie [1]')
+parser.add_argument('-ht_s', '--ht_size', action='store', default='1e9', help='Hash table size.')
+parser.add_argument('-ht_n', '--ht_number', action='store', type=int, default=4, help='Hash table number.')
args = parser.parse_args()
if args.command_line:
+ if platform.system() == 'Windows':
+ args.khmer = False
if args.khmer:
import khmer
if args.contig_file is None or args.read_file is None or args.output_folder is None:
@@ -4937,4 +5004,4 @@ def writeMultiFasta(self):
root.option_add("*Scrollbar.Background", "#C0C0FF")
root.option_add("*Entry.Background", "#FFFFFF")
app = App(root)
- root.mainloop()
+ root.mainloop()
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index a594fd6..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,4 +0,0 @@
-include LICENSE
-include requirements.txt
-include README.rst
-recursive-include docs/_build/html *
diff --git a/README.rst b/README.rst
index 20af3aa..1136ffe 100644
--- a/README.rst
+++ b/README.rst
@@ -5,121 +5,13 @@ Contiguity is a tool for constructing and visualising assembly graphs.
It uses a linear layout so that the assembly graph can be directly compared
to a reference.
+The main website for contiguity can be found at http://mjsull.github.io/Contiguity
-.. image:: https://pypip.in/version/Contiguity/badge.svg
- :target: https://pypi.python.org/pypi/Contiguity/
- :alt: Latest Version
-
-.. image:: https://pypip.in/download/Contiguity/badge.svg
- :target: https://pypi.python.org/pypi/Contiguity/
- :alt: Downloads
-
-.. image:: https://travis-ci.org/BeatsonLab-MicrobialGenomics/Contiguity.svg?branch=master
- :target: https://travis-ci.org/BeatsonLab-MicrobialGenomics/Contiguity
- :alt: Build status
-
-
-.. image:: https://github.com/BeatsonLab-MicrobialGenomics/Contiguity/blob/master/docs/manual/Contiguity_SS.png
- :alt: Contiguity Screen shot
- :align: center
-
-
-Requirements:
- * Python 2.7+
- * NCBI-BLAST+ (needed for overlap edge creation and automatic comparison
- generation)
- * Bowtie 2 (needed for paired end edge creation)
-
-
-Installation
-------------
-
-If you're not familiar with the command-line we recommend you ask local IT
-support to help you install Contiguity.
-
-
-Checking requirements are installed
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-You will need to install/have installed:
- * ncbiblast+ >= 2.2.28
- * python >= 2.7 (**Python 3 is not supported**)
- * bowtie2 >= 2.1.0
-
-You can check these are installed by::
-
- $ python --version
- $ blastn -version
- $ bowtie2 --version
-
-Installation of python, blastn or bowtie2 (without a package manager) is
-beyond the scope of this document.
-
-If you have both python, blastn and bowtie2 you need to (if not already
-present) install pip_.
-
-You can check if pip_ exists with::
-
- $ which pip
-
-If you get a "not found", please read the `pip installation instructions`_.
-
-**If you already have pip we do suggest you upgrade it.** We are using version
-1.5.6 at the time of writing this document.
-
-You can upgrade pip_ like this::
-
- $ pip install --upgrade pip
-
-
-pip based installation of Contiguity
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-If you have root/admin something like::
-
- $ pip install Contiguity
-
-Otherwise (not root/admin or permission denied errors running above)::
-
- $ pip install --user Contiguity
-
-If you installed using the --user option of pip_, Contiguity will typically
-end up in: /home/$USER/.local/bin/
-You need to add this location to you ~/.bash_profile.
-
-Add Contiguity to your path::
-
- $ echo 'export PATH=$PATH:/home/$USER/.local/bin/' >> ~/.bash_profile
- $ source !$
-
-
-Testing the installation of Contiguity
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Run (in the Terminal)::
-
- $ Contiguity
-
-
-Upgrading Contiguity
-~~~~~~~~~~~~~~~~~~~~
-
-You can upgrade like this::
-
- pip install --upgrade Contiguity
-
-
-**Please regularly check back to make sure you're running the most recent
-Contiguity version.**
-
-
-Usage/Docs
-----------
+Contiguity can be downloaded from http://mjsull.github.io/Contiguity/files.html
For detailed information on how to use Contiguity please see the manual_
otherwise see Quick Start below.
-
Quick Start
-----------
@@ -127,7 +19,7 @@ Supported formats & the CAG
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Contiguity works with ABySS_ (**.dot**), Velvet_ (**LastGraph**), Newbler_
-(**.ace**) and SPAdes_ (**FASTG**) formats.
+(**.ace**), GFA_ and SPAdes_ (**FASTG**) formats.
For all other assemblies, an assembly graph (.cag) can be created from the
Contiguity GUI (file->create cag file) or using the command line.
@@ -145,6 +37,8 @@ You can generate a CAG from the command line like::
$ Contiguity -cl -c -fq -o
+Or by selecting File > Create CAG in the GUI and providing a read file and a contig file
+
This assumes:
* (~8GB of free memory)
* contig_file.fa: is in FASTA file of contigs or scaffolds
@@ -188,16 +82,16 @@ Citation
If you use Contiguity in your work, please cite it using::
- Mitchell J Sullivan, Nouri Ben Zakour, Brian Forde, Mitchell Stanton-Cook & Scott A Beatson*
- Contiguity: Contig adjacency graph construction and visualisation
- https://github.com/BeatsonLab-MicrobialGenomics/Contiguity
+ Sullivan MJ, Ben Zakour NL, Forde BM, Stanton-Cook M, Beatson SA. (2015)
+ Contiguity: Contig adjacency graph construction and visualisation.
+ PeerJ PrePrints 3:e1273 https://dx.doi.org/10.7287/peerj.preprints.1037v1
-.. _manual: https://github.com/BeatsonLab-MicrobialGenomics/Contiguity/raw/master/docs/manual/Contiguity_manual.pdf
-.. _pip: http://www.pip-installer.org/en/latest/
+.. _manual: https://github.com/mjsull/Contiguity/wiki
.. _pip installation instructions: http://pip.readthedocs.org/en/latest/installing.html
.. _ABySS: http://www.bcgsc.ca/platform/bioinfo/software/abyss
.. _Velvet: https://www.ebi.ac.uk/~zerbino/velvet/
.. _Newbler: http://www.454.com/products/analysis-software/
.. _SPAdes: http://bioinf.spbau.ru/spades
+.. _GFA: https://github.com/pmelsted/GFA-spec
diff --git a/contiguity/Contiguity b/contiguity/Contiguity
deleted file mode 120000
index 040774c..0000000
--- a/contiguity/Contiguity
+++ /dev/null
@@ -1 +0,0 @@
-Contiguity.py
\ No newline at end of file
diff --git a/contiguity/__init__.py b/contiguity/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/contiguity/util/__init__.py b/contiguity/util/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/contiguity/util/checkCSAG.py b/contiguity/util/checkCSAG.py
deleted file mode 100644
index 5c06cca..0000000
--- a/contiguity/util/checkCSAG.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Contiguity - Tool for visualising assemblies
-# Copyright (C) 2013-2015 Mitchell Sullivan
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#
-# Mitchell Sullivan
-# mjsull@gmail.com
-# School of Chemistry & Molecular Biosciences
-# The University of Queensland
-# Brisbane, QLD 4072.
-# Australia
-
-
-import sys, subprocess, string
-
-transtab = string.maketrans('atcgATCG', 'tagcTAGC')
-theident = 85
-
-
-class contig:
- def __init__(self, name, forseq, revseq):
- self.name = name
- self.forseq = forseq
- self.revseq = revseq
- self.to = []
- self.fr = []
- self.length = len(forseq)
-
-contigfile = open(sys.argv[1])
-getline = 0
-contigDict = {}
-edgelist = []
-templg = open('templg.fa', 'w')
-bignodes = set()
-count = 0
-first = True
-temped = open('tempedge.fa', 'w')
-for line in contigfile:
- if line.startswith('NODE'):
- name = line.split()[1]
- seq = line.split()[-1]
- revseq = seq[::-1].translate(transtab)
- aninstance = contig(name, seq, revseq)
- contigDict[name] = aninstance
- templg.write('>' + name + '\n' + seq + '\n')
- elif line.startswith('EDGE'):
- junk, a, dira, b, dirb, overlap = line.split()
- if overlap == '.':
- overlap = ''
- if not 'n' in overlap:
- temped.write('>' + str(count) + '\n')
- if dira == 'True':
- if dirb == 'True':
- if overlap.isdigit():
- contigDict[a].to.append((b, True, int(overlap)))
- contigDict[b].fr.append((a, False, int(overlap)))
- temped.write(contigDict[a].forseq + contigDict[b].forseq[int(overlap):] + '\n')
- edgelist.append((a, b, len(contigDict[a].forseq + contigDict[b].forseq[int(overlap):])))
- else:
- contigDict[a].to.append((b, True, overlap))
- contigDict[b].fr.append((a, False, overlap[::-1].translate(transtab)))
- temped.write(contigDict[a].forseq + overlap + contigDict[b].forseq + '\n')
- edgelist.append((a, b, len(contigDict[a].forseq + overlap + contigDict[b].forseq)))
- else:
- if overlap.isdigit():
- contigDict[a].to.append((b, False, int(overlap)))
- contigDict[b].to.append((a, False, int(overlap)))
- temped.write(contigDict[a].forseq + contigDict[b].revseq[int(overlap):] + '\n')
- edgelist.append((a, b, len(contigDict[a].forseq + contigDict[b].revseq[int(overlap):])))
- else:
- contigDict[a].to.append((b, False, overlap))
- contigDict[b].to.append((a, False, overlap[::-1].translate(transtab)))
- temped.write(contigDict[a].forseq + overlap + contigDict[b].revseq + '\n')
- edgelist.append((a, b, len(contigDict[a].forseq + overlap + contigDict[b].revseq)))
- else:
- if dirb == 'True':
- if overlap.isdigit():
- contigDict[a].fr.append((b, True, int(overlap)))
- contigDict[b].fr.append((a, True, int(overlap)))
- temped.write(contigDict[a].revseq + contigDict[b].forseq[int(overlap):] + '\n')
- edgelist.append((a, b, len(contigDict[a].revseq + contigDict[b].forseq[int(overlap):])))
- else:
- contigDict[a].fr.append((b, True, overlap))
- contigDict[b].fr.append((a, True, overlap[::-1].translate(transtab)))
- temped.write(contigDict[a].revseq + overlap + contigDict[b].forseq + '\n')
- edgelist.append((a, b, len(contigDict[a].revseq + overlap + contigDict[b].forseq)))
- else:
- if overlap.isdigit():
- contigDict[a].fr.append((b, False, int(overlap)))
- contigDict[b].to.append((a, True, int(overlap)))
- temped.write(contigDict[a].revseq + contigDict[b].revseq[int(overlap):] + '\n')
- edgelist.append((a, b, len(contigDict[a].revseq + contigDict[b].revseq[int(overlap):])))
- else:
- contigDict[a].fr.append((b, False, overlap))
- contigDict[b].to.append((a, True, overlap[::-1].translate(transtab)))
- temped.write(contigDict[a].revseq + overlap + contigDict[b].revseq + '\n')
- edgelist.append((a, b, len(contigDict[a].revseq + overlap + contigDict[b].revseq)))
- count += 1
-
-templg.close()
-temped.close()
-
-
-
-first = True
-reflist = {}
-contiglist = {}
-for i in sys.argv[2:]:
- tempref = open(i)
- for line in tempref:
- if line.startswith('>'):
- if first:
- first = False
- else:
- reflist[name] = seq
- contiglist[name] = []
- name = line[1:].split()[0]
- seq = ''
- else:
- seq += line.rstrip()
- tempref.close()
-reflist[name] = seq
-contiglist[name] = []
-
-refout = open('tempref.fa', 'w')
-reflen = {}
-for i in reflist:
- refout.write('>' + i + '\n' + reflist[i] + reflist[i] + '\n')
- reflen[i] = len(reflist[i])
-refout.close()
-
-
-subprocess.Popen('makeblastdb -dbtype nucl -out tempdb -in tempref.fa', shell=True).wait()
-subprocess.Popen('blastn -task blastn -db tempdb -outfmt 6 -query templg.fa -out query_tempdb1.out', shell=True).wait()
-subprocess.Popen('blastn -task blastn -db tempdb -outfmt 6 -query tempedge.fa -out query_tempdb2.out', shell=True).wait()
-
-bout = open('query_tempdb1.out')
-inref = set()
-
-for line in bout:
- query, subject, ident, length, mismatch, indel, qStart, qEnd, rStart, rEnd, eVal, bitScore = line.split()
- ident = float(ident)
- length = int(length)
- qStart = int(qStart)
- qEnd = int(qEnd)
- rStart = int(rStart)
- rEnd = int(rEnd)
- if ident >= theident and qStart == 1 and qEnd == contigDict[query].length:
- inref.add(query)
- contiglist[subject].append((query, min([rStart, rEnd]), max([rStart, rEnd]), rStart < rEnd, ident, length))
-bout.close()
-bout = open('query_tempdb2.out')
-testset = set()
-allconts = set(contigDict)
-edges = set()
-edgedict ={}
-
-for i in contiglist:
- hitlist = contiglist[i]
- hitlist.sort(key=lambda x: x[-2], reverse=True)
- hitlist.sort(key=lambda x: x[-1], reverse=True)
- newhitlist = []
- for j in hitlist:
- getit = True
- for k in newhitlist:
- if j[1] >= k[1] and j[2] <= k[2]:
- getit = False
- break
- if getit:
- newhitlist.append(j)
- newhitlist.sort(key=lambda x: x[1])
- hitlist = newhitlist
- lasthit = None
- for j in hitlist:
- if lasthit != None:
- if not (j[0], not j[3], lasthit[0], not lasthit[3]) in edges and j[1] < lasthit[2] + 301:
- edges.add((lasthit[0], lasthit[3], j[0], j[3]))
- edgedict[(lasthit[0], lasthit[3], j[0], j[3])] = [lasthit, j]
- lasthit = j
-
-SP = 0
-for i in edges:
- #print i
- gotit = False
- if i[1]:
- for j in contigDict[i[0]].to:
- if j[0] == i[2] and j[1] == i[3]:
- gotit = True
- break
- else:
- for j in contigDict[i[0]].fr:
- if j[0] == i[2] and j[1] == i[3]:
- gotit = True
- break
- if gotit:
- SP += 1
-
-
-
-
-
-
-edgeinref = set()
-for line in bout:
- query, subject, ident, length, mismatch, indel, qStart, qEnd, rStart, rEnd, eVal, bitScore = line.split()
- ident = float(ident)
- length = int(length)
- qStart = int(qStart)
- qEnd = int(qEnd)
- rStart = int(rStart)
- rEnd = int(rEnd)
- if ident >= theident and length >= 0.98 * edgelist[int(query)][-1]:
- edgeinref.add(int(query))
-
-
-
-TP = 0
-FP = 0
-MA = 0
-for i in range(len(edgelist)):
- if edgelist[i][0] in inref and edgelist[i][1] in inref:
- if i in edgeinref:
- TP += 1
- else:
- FP += 1
- else:
- MA += 1
-
-print TP , FP , SP, len(edges) - SP
-print TP * 100.0 / (TP+FP), '% precision'
-print SP * 100.0 / len(edges), '% sensitivity'
diff --git a/contiguity/util/checkLG.py b/contiguity/util/checkLG.py
deleted file mode 100644
index 302eb0f..0000000
--- a/contiguity/util/checkLG.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# Contiguity - Tool for visualising assemblies
-# Copyright (C) 2013-2015 Mitchell Sullivan
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#
-# Mitchell Sullivan
-# mjsull@gmail.com
-# School of Chemistry & Molecular Biosciences
-# The University of Queensland
-# Brisbane, QLD 4072.
-# Australia
-
-
-import sys, subprocess
-
-theident = 85
-bignodesize = 49
-
-class contig:
- def __init__(self, name, forseq, revseq):
- self.name = name
- self.forseq = forseq
- self.revseq = revseq
- self.to = []
- self.fr = []
- self.length = len(forseq)
-
-lg = open(sys.argv[1])
-getline = 0
-contigDict = {}
-edgelist = []
-templg = open('templg.fa', 'w')
-bignodes = set()
-count = 0
-for line in lg:
- if line.startswith('NODE'):
- name = line.split()[1]
- getline = 1
- elif getline == 1:
- forseq = line.rstrip()
- getline = 2
- elif getline == 2:
- revseq = line.rstrip()
- getline = 0
- aninstance = contig(name, forseq, revseq)
- if len(forseq) >= bignodesize:
- bignodes.add(name)
- templg.write('>' + name + '\n' + forseq + '\n')
- contigDict[name] = aninstance
- elif line.startswith('ARC'):
- to, fr = line.split()[1:3]
- if to[0] == '-':
- if fr not in contigDict[to[1:]].fr:
- contigDict[to[1:]].fr.append(fr)
- else:
- if fr not in contigDict[to].to:
- contigDict[to].to.append(fr)
- if to[0] == '-':
- to = to[1:]
- else:
- to = '-' + to
- if fr[0] == '-':
- if to not in contigDict[fr[1:]].to:
- contigDict[fr[1:]].to.append(to)
- else:
- if to not in contigDict[fr].fr:
- contigDict[fr].fr.append(to)
-templg.close()
-temped = open('tempedge.fa', 'w')
-todo = []
-outpaths = []
-for i in bignodes:
- for j in contigDict[i].to:
- todo.append([i, j])
- for j in contigDict[i].fr:
- todo.append(['-' + i, j])
-
-while len(todo) != 0:
- currpath = todo.pop()
- if currpath[-1].replace('-', '') in bignodes:
- outpaths.append(currpath)
- elif len(currpath) < 300:
- if currpath[-1][0] == '-':
- for i in contigDict[currpath[-1][1:]].fr:
- todo.append(currpath + [i])
- else:
- for i in contigDict[currpath[-1]].to:
- todo.append(currpath + [i])
-
-temppaths = []
-for i in outpaths:
- if len(i) > 2:
- lall = 0
- for j in i[1:-1]:
- if j[0] == '-':
- lall += contigDict[j[1:]].length
- else:
- lall += contigDict[j].length
- if lall <= 300:
- temppaths.append(i)
- else:
- temppaths.append(i)
- # print i
-
-count = 0
-for i in contigDict:
- contigDict[i].to = []
- contigDict[i].fr = []
-for i in temppaths:
- seq = ''
- for j in i:
- if j[0] == '-':
- seq += contigDict[j[1:]].revseq
- else:
- seq += contigDict[j].forseq
- edgelist.append((i[0].replace('-', ''), i[-1].replace('-', ''), len(seq)))
- if i[0][0] == '-':
- a = i[0][1:]
- if i[-1][0] == '-':
- b = i[-1][1:]
- contigDict[a].fr.append((b, False))
- contigDict[b].to.append((a, True))
- else:
- b = i[-1]
- contigDict[a].fr.append((b, True))
- contigDict[b].fr.append((a, True))
- else:
- a = i[0]
- if i[-1][0] == '-':
- b = i[-1][1:]
- contigDict[a].to.append((b, False))
- contigDict[b].to.append((a, False))
- else:
- b = i[-1]
- contigDict[a].to.append((b, True))
- contigDict[b].fr.append((a, False))
- temped.write('>' + str(count) + '\n' + seq + '\n')
- count += 1
-temped.close()
-
-
-
-first = True
-reflist = {}
-contiglist = {}
-for i in sys.argv[2:]:
- tempref = open(i)
- for line in tempref:
- if line.startswith('>'):
- if first:
- first = False
- else:
- reflist[name] = seq
- contiglist[name] = []
- name = line[1:].split()[0]
- seq = ''
- else:
- seq += line.rstrip()
- tempref.close()
-reflist[name] = seq
-contiglist[name] = []
-
-refout = open('tempref.fa', 'w')
-reflen = {}
-for i in reflist:
- refout.write('>' + i + '\n' + reflist[i] + reflist[i] + '\n')
- reflen[i] = len(reflist[i])
-refout.close()
-
-
-subprocess.Popen('makeblastdb -dbtype nucl -out tempdb -in tempref.fa', shell=True).wait()
-subprocess.Popen('blastn -task blastn -db tempdb -outfmt 6 -query templg.fa -out query_tempdb1.out', shell=True).wait()
-subprocess.Popen('blastn -task blastn -db tempdb -outfmt 6 -query tempedge.fa -out query_tempdb2.out', shell=True).wait()
-
-bout = open('query_tempdb1.out')
-inref = set()
-
-for line in bout:
- query, subject, ident, length, mismatch, indel, qStart, qEnd, rStart, rEnd, eVal, bitScore = line.split()
- ident = float(ident)
- length = int(length)
- qStart = int(qStart)
- qEnd = int(qEnd)
- rStart = int(rStart)
- rEnd = int(rEnd)
- if ident >= theident and qStart == 1 and qEnd == contigDict[query].length:
- inref.add(query)
- contiglist[subject].append((query, min([rStart, rEnd]), max([rStart, rEnd]), rStart < rEnd, ident, length))
-bout.close()
-bout = open('query_tempdb2.out')
-testset = set()
-allconts = set(contigDict)
-edges = set()
-edgedict ={}
-
-for i in contiglist:
- hitlist = contiglist[i]
- hitlist.sort(key=lambda x: x[-2], reverse=True)
- hitlist.sort(key=lambda x: x[-1], reverse=True)
- newhitlist = []
- for j in hitlist:
- getit = True
- for k in newhitlist:
- if j[1] >= k[1] and j[2] <= k[2]:
- getit = False
- break
- if getit:
- newhitlist.append(j)
- newhitlist.sort(key=lambda x: x[1])
- hitlist = newhitlist
- lasthit = None
- for j in hitlist:
- if lasthit != None:
- if not (j[0], not j[3], lasthit[0], not lasthit[3]) in edges and j[1] < lasthit[2] + 301:
- edges.add((lasthit[0], lasthit[3], j[0], j[3]))
- edgedict[(lasthit[0], lasthit[3], j[0], j[3])] = [lasthit, j]
- lasthit = j
-
-SP = 0
-for i in edges:
- #print i
- gotit = False
- if i[1]:
- for j in contigDict[i[0]].to:
- if j[0] == i[2] and j[1] == i[3]:
- gotit = True
- break
- else:
- for j in contigDict[i[0]].fr:
- if j[0] == i[2] and j[1] == i[3]:
- gotit = True
- break
- if gotit:
- SP += 1
-
-
-
-
-
-
-edgeinref = set()
-for line in bout:
- query, subject, ident, length, mismatch, indel, qStart, qEnd, rStart, rEnd, eVal, bitScore = line.split()
- ident = float(ident)
- length = int(length)
- qStart = int(qStart)
- qEnd = int(qEnd)
- rStart = int(rStart)
- rEnd = int(rEnd)
- if ident >= theident and length >= 0.98 * edgelist[int(query)][-1]:
- edgeinref.add(int(query))
-
-
-
-TP = 0
-FP = 0
-MA = 0
-for i in range(len(edgelist)):
- if edgelist[i][0] in inref and edgelist[i][1] in inref:
- if i in edgeinref:
- TP += 1
- else:
- FP += 1
- else:
- MA += 1
-
-print TP , FP , SP, len(edges) - SP
-print TP * 100.0 / (TP+FP), '% precision'
-print SP * 100.0 / len(edges), '% sensitivity'
diff --git a/contiguity/util/coif.py b/contiguity/util/coif.py
deleted file mode 100644
index 3fb7040..0000000
--- a/contiguity/util/coif.py
+++ /dev/null
@@ -1,955 +0,0 @@
-# COIF - Plasmid detection toolkit
-# Copyright (C) 2013-2015 Mitchell Sullivan
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#
-# Mitchell Sullivan
-# mjsull@gmail.com
-# School of Chemistry & Molecular Biosciences
-# The University of Queensland
-# Brisbane, QLD 4072.
-# Australia
-
-
-import networkx
-import argparse
-import os
-import subprocess
-import string
-import sys
-import numpy as np
-
-transtab = string.maketrans('atcgATCG', 'tagcTAGC')
-
-
-# class containing all information for a contig
-class contig:
- def __init__(self, name, shortname, sequence, revseq=None, coverage=None):
- self.name = name
- self.shortname = shortname
- self.forseq = sequence.upper()
- if revseq == None:
- tempseq = self.forseq[::-1]
- self.revseq = tempseq.translate(transtab)
- else:
- self.revseq = revseq
- self.length = len(sequence)
- self.xlength = None
- if self.length >= 1000000000:
- self.strlen = str(round(self.length * 1.0 / 1000000000, 2)) + 'Gb'
- elif self.length >= 1000000:
- self.strlen = str(round(self.length * 1.0 / 1000000, 2)) + 'Mb'
- elif self.length >= 1000:
- self.strlen = str(self.length / 1000) + 'Kb'
- else:
- self.strlen = str(self.length) + 'bp'
- self.visible = False
- self.to = []
- self.fr = []
- if coverage is None:
- self.coverage = 'N/A'
- else:
- self.coverage = round(coverage, 2)
- try:
- self.coverage = float(name.split('_')[5])
- except:
- pass
- gcount = self.forseq.count('G')
- ccount = self.forseq.count('C')
- acount = self.forseq.count('A')
- tcount = self.forseq.count('T')
- self.gccontent = round((gcount + ccount) * 100.0 / self.length, 2)
- try:
- self.gcskew = round((gcount - ccount) * 1.0 / (gcount + ccount), 2)
- except ZeroDivisionError:
- self.gcskew = 0
- try:
- self.atskew = round((acount - tcount) * 1.0 / (acount + tcount), 2)
- except ZeroDivisionError:
- self.atskew = 0
-
-
-def load_fasta(self):
- fasta = open(args.basic)
- for line in fasta:
- if line.startswith('>'):
- if first:
- first = False
- else:
- aninstance = contig(name, name, seq)
- contigDict[name] = aninstance
- name = line.rstrip()[1:]
- seq = ''
- else:
- seq += line.rstrip()
- aninstance = contig(name, name, seq)
- contigDict[name] = aninstance
-
-# load a CAG file
-def load_cag(self):
- csag = open(args.cag_file)
- edgelist = []
- global contigDict
- for line in csag:
- if line.split()[0] == 'NODE':
- if len(line.split()) == 4:
- title, entry, name, seq = line.split()
- aninstance = contig(entry, name, seq)
- else:
- title, entry, name, coverage, seq = line.split()
- if coverage == 'N/A':
- coverage = None
- else:
- coverage = float(coverage)
- aninstance = contig(entry, name, seq, None, coverage)
- contigDict[entry] = aninstance
- elif line.split()[0] == 'EDGE':
- title, n1, d1, n2, d2, overlap = line.split()
- if overlap == '.':
- overlap = ''
- if d1 == 'True':
- d1 = True
- else:
- d1 = False
- if d2 == 'True':
- d2 = True
- else:
- d2 = False
- if overlap.isdigit():
- overlap = int(overlap)
- edgelist.append((n1, d1, n2, d2, overlap))
- for i in edgelist:
- contiga, dira, contigb, dirb, overlap = i
- if dira and dirb:
- contigDict[contiga].to.append((contigb, True, overlap))
- contigDict[contigb].fr.append((contiga, False, overlap))
- elif dira and not dirb:
- contigDict[contiga].to.append((contigb, False, overlap))
- contigDict[contigb].to.append((contiga, False, overlap))
- elif not dira and dirb:
- contigDict[contiga].fr.append((contigb, True, overlap))
- contigDict[contigb].fr.append((contiga, True, overlap))
- else:
- contigDict[contiga].fr.append((contigb, False, overlap))
- contigDict[contigb].to.append((contiga, True, overlap))
-
-# find best reference
-def predict_blast1(args):
- maxgot = 0
- maxgot2 = 0
- bestref2 = None
- bestref = None
- for i in os.listdir(args.blast1):
- reflen = 0
- ref = open(args.blast1 + '/' + i)
- for line in ref:
- if not line.startswith('>'):
- reflen += len(line.rstrip())
- ref.close()
- subprocess.Popen('makeblastdb -dbtype nucl -out ' + args.work_dir + '/tempdb -in ' + args.blast1 + '/' + i, shell=True, stdout=subprocess.PIPE).wait()
- subprocess.Popen('blastn -db ' + args.work_dir + '/tempdb -outfmt 6 -num_threads 8 -query ' + args.work_dir + '/contigs.fa -out ' + args.work_dir + '/contigs_tempdb.out', shell=True).wait()
- blast = open(args.work_dir + '/contigs_tempdb.out')
- gotset = set()
- querydict = {}
- for line in blast:
- query, subject, ident, length, mm, indel, qstart, qstop, rstart, rstop, eval, bitscore = line.split()
- qstart, qstop, rstart, rstop, length, mm = map(int, [qstart, qstop, rstart, rstop, length, mm])
- eval = float(eval)
- if eval <= 0.005:
- for j in range(min([rstart, rstop]), max([rstart, rstop]) + 1):
- gotset.add(j)
- if not query in querydict:
- querydict[query] = set()
- for j in range(qstart, qstop + 1):
- querydict[query].add(j)
- blast.close()
- aval, bval = 0, 0
- for j in contigDict:
- if j in querydict:
- aval += len(querydict[j])
- bval += contigDict[j].length
- gotset2 = aval * 1.0 / bval
- if len(gotset) * 1.0/ reflen + gotset2 >= maxgot:
- bestref2 = bestref
- maxgot2 = maxgot
- bestref = i
- maxgot = len(gotset) * 1.0 / reflen + gotset2
- elif len(gotset) * 1.0/reflen + gotset2 >= maxgot2:
- bestref2 = i
- maxgot2 = len(gotset) * 1.0 /reflen + gotset2
- return bestref2
-
-# write CAG to FASTA for perfoming BLAST etc.
-def write_fasta_cag(args):
- out = open(args.work_dir + '/contigs.fa', 'w')
- for i in contigDict:
- out.write('>' + i + '\n' + contigDict[i].forseq + '\n')
- out.close()
-
-# use selected reference to predict plasmid contigs
-def predict_blast2(args):
- subprocess.Popen('makeblastdb -dbtype nucl -out ' + args.work_dir + '/tempdb -in ' + args.blast2, shell=True, stdout=subprocess.PIPE).wait()
- subprocess.Popen('blastn -db ' + args.work_dir + '/tempdb -outfmt 6 -num_threads 8 -query ' + args.work_dir + '/contigs.fa -out ' + args.work_dir + '/contigs_tempdb.out', shell=True).wait()
- filtered = set()
- blast = open(args.work_dir + '/contigs_tempdb.out')
- for line in blast:
- query, subject, ident, length, mm, indel, qstart, qstop, rstart, rstop, eval, bitscore = line.split()
- qstart, qstop, rstart, rstop, length, mm = map(int, [qstart, qstop, rstart, rstop, length, mm])
- ident = float(ident)
- if length >= args.min_length and ident >= args.min_ident and length >= contigDict[query].length * args.min_len_fract:
- filtered.add(query)
- blast.close()
- candidates = set()
- for i in contigDict:
- if not i in filtered:
- candidates.add(i)
- return candidates
-
-# turn the contig dictionary into a Directional graph for networkx
-def contigDict2nx():
- dg = networkx.DiGraph()
- for i in contigDict:
- dg.add_node(i)
- dg.add_node('-' + i)
- for j in contigDict[i].to:
- if j[1]:
- dg.add_edge(i, j[0])
- else:
- dg.add_edge(i, '-' + j[0])
- for j in contigDict[i].fr:
- if j[1]:
- dg.add_edge('-' + i, j[0])
- else:
-
- dg.add_edge('-' + i, '-' + j[0])
- return dg
-
-# Find all simple paths between a list of candidates
-def find_paths(dg, candidates, args):
- outpaths = []
- listcan = list(candidates)
- while len(listcan) > 0:
- i = listcan[0]
- for j in listcan:
- paths = list(networkx.all_simple_paths(dg, i, j, args.max_path_node))
- for k in paths:
- pathseq = ''
- lastcontig = i
- lastcontigdir = True
- getit = True
- for l in k[1:-1]:
- if '-' in l:
- contig = l[1:]
- contigdir = False
- else:
- contig = l
- contigdir = True
- if contig in candidates:
- getit = False
- break
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if type(overlap) is int:
- if contigdir:
- pathseq += contigDict[contig].forseq[overlap:]
- else:
- pathseq += contigDict[contig].revseq[overlap:]
- else:
- pathseq += overlap
- if contigdir:
- pathseq += contigDict[contig].forseq
- else:
- pathseq += contigDict[contig].revseq
- lastcontig = contig
- lastcontigdir = contigdir
- if k[-1][0] == '-':
- contig = k[-1][1:]
- contigdir = False
- else:
- contig = k[-1]
- contigdir = True
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if getit:
- if type(overlap) is int:
- pathseq = pathseq[:-overlap]
- else:
- pathseq += overlap
- if len(pathseq) <= args.max_path_length:
- outpaths.append((k, pathseq))
- paths = list(networkx.all_simple_paths(dg, i, '-' + j, args.max_path_node))
- for k in paths:
- pathseq = ''
- lastcontig = i
- lastcontigdir = True
- getit = True
- for l in k[1:-1]:
- if '-' in l:
- contig = l[1:]
- contigdir = False
- else:
- contig = l
- contigdir = True
- if contig in candidates:
- getit = False
- break
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if type(overlap) is int:
- if contigdir:
- pathseq += contigDict[contig].forseq[overlap:]
- else:
- pathseq += contigDict[contig].revseq[overlap:]
- else:
- pathseq += overlap
- if contigdir:
- pathseq += contigDict[contig].forseq
- else:
- pathseq += contigDict[contig].revseq
- lastcontig = contig
- lastcontigdir = contigdir
- if k[-1][0] == '-':
- contig = k[-1][1:]
- contigdir = False
- else:
- contig = k[-1]
- contigdir = True
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if getit:
- if type(overlap) is int:
- pathseq = pathseq[:-overlap]
- else:
- pathseq += overlap
- if len(pathseq) <= args.max_path_length:
- outpaths.append((k, pathseq))
- paths = list(networkx.all_simple_paths(dg, '-' + i, j, args.max_path_node))
- for k in paths:
- pathseq = ''
- lastcontig = i
- lastcontigdir = False
- getit = True
- for l in k[1:-1]:
- if '-' in l:
- contig = l[1:]
- contigdir = False
- else:
- contig = l
- contigdir = True
- if contig in candidates:
- getit = False
- break
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if type(overlap) is int:
- if contigdir:
- pathseq += contigDict[contig].forseq[overlap:]
- else:
- pathseq += contigDict[contig].revseq[overlap:]
- else:
- pathseq += overlap
- if contigdir:
- pathseq += contigDict[contig].forseq
- else:
- pathseq += contigDict[contig].revseq
- lastcontig = contig
- lastcontigdir = contigdir
- if k[-1][0] == '-':
- contig = k[-1][1:]
- contigdir = False
- else:
- contig = k[-1]
- contigdir = True
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if getit:
- if type(overlap) is int:
- pathseq = pathseq[:-overlap]
- else:
- pathseq += overlap
- if len(pathseq) <= args.max_path_length:
- outpaths.append((k, pathseq))
- paths = list(networkx.all_simple_paths(dg, '-' + i, '-' + j, args.max_path_node))
- for k in paths:
- pathseq = ''
- lastcontig = i
- lastcontigdir = False
- getit = True
- for l in k[1:-1]:
- if '-' in l:
- contig = l[1:]
- contigdir = False
- else:
- contig = l
- contigdir = True
- if contig in candidates:
- getit = False
- break
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if type(overlap) is int:
- if contigdir:
- pathseq += contigDict[contig].forseq[overlap:]
- else:
- pathseq += contigDict[contig].revseq[overlap:]
- else:
- pathseq += overlap
- if contigdir:
- pathseq += contigDict[contig].forseq
- else:
- pathseq += contigDict[contig].revseq
- lastcontig = contig
- lastcontigdir = contigdir
- if k[-1][0] == '-':
- contig = k[-1][1:]
- contigdir = False
- else:
- contig = k[-1]
- contigdir = True
- if lastcontigdir:
- for m in contigDict[lastcontig].to:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- else:
- for m in contigDict[lastcontig].fr:
- if m[0] == contig and m[1] == contigdir:
- overlap = m[2]
- break
- if getit:
- if type(overlap) is int:
- pathseq = pathseq[:-overlap]
- else:
- pathseq += overlap
- if len(pathseq) <= args.max_path_length:
- outpaths.append((k, pathseq))
- listcan.pop(0)
- return outpaths
-
-
-
-# check paths with paired end mapping
-def check_paths(paths):
- pass
-
-
-# given a list of paths find the shortest
-def getShortest(paths):
- outpaths = {}
- for i in paths:
- sn = i[0][0]
- en = i[0][-1]
- if sn in outpaths:
- if en in outpaths[sn]:
- if len(i[1]) < len(outpaths[sn][en][1]):
- outpaths[sn][en] = i
- else:
- outpaths[sn][en] = i
- else:
- outpaths[sn] = {en:i}
- return outpaths
-
-
-
-def predict_blast3(filename):
- pass
-
-# improve predictions
-def improve_predict(candContigs):
- pass
-
-# predict small contigs
-def predict_small(candContigs):
- pass
-
-# remove similar paths
-def remove_dup(paths):
- pass
-
-
-# find all simple circuits using a graph of candidates
-def find_circuits(canddict):
- nx = networkx.DiGraph()
- for i in canddict:
- for j in canddict[i]:
- nx.add_edge(i, j)
- if i[0] == '-':
- newi = i[1:]
- else:
- newi = '-' + i
- if j[0] == '-':
- newj = j[1:]
- else:
- newj = '-' + j
- nx.add_edge(newj, newi)
- return networkx.simple_cycles(nx)
-
-
-
-def predict_contigs(canddict, plascan):
- sys.stdout.write('Trimming contigs not in circuit..\n')
- G = networkx.DiGraph()
- for i in canddict:
- for j in canddict[i]:
- G.add_edge(i, j)
- if i[0] == '-':
- newi = i[1:]
- else:
- newi = '-' + i
- if j[0] == '-':
- newj = j[1:]
- else:
- newj = '-' + j
- G.add_edge(newj, newi)
- sys.stdout.write(str(len(G.nodes())) + ' initial nodes in ' + str(networkx.number_connected_components(G.to_undirected())) + ' connected components.\n')
- notgotemall = True
- while notgotemall:
- notgotemall = False
- for i in plascan:
- if i in G:
- if len(G[i]) == 0 or len(G['-' + i]) == 0:
- notgotemall = True
- G.remove_node(i)
- G.remove_node('-' + i)
- sys.stdout.write(str(len(G.nodes())) + ' remaining nodes.\n')
- predset = set()
- for i in G.edges():
- if i[0] in canddict and i[1] in canddict[i[0]]:
- for j in canddict[i[0]][i[1]][0]:
- if j[0] == '-':
- predset.add(j[1:])
- else:
- predset.add(j)
- return predset
-
-def theil_sen(x,y):
- n = len(x)
- ord = np.argsort(x)
- xs = x[ord]
- ys = y[ord]
- vec1 = np.zeros( (n,n) )
- for ii in range(n):
- for jj in range(n):
- vec1[ii,jj] = ys[ii]-ys[jj]
- vec2 = np.zeros( (n,n) )
- for ii in range(n):
- for jj in range(n):
- vec2[ii,jj] = xs[ii]-xs[jj]
- v1 = vec1[vec2>0]
- v2 = vec2[vec2>0]
- slope = np.median( v1/v2 )
- coef = np.zeros( (2,1) )
- b_0 = np.median(y)-slope*np.median(x)
- b_1 = slope
- res = y-b_1*x-b_0 # residuals
- return (b_0,b_1)
-
-
-def predict_cov(args):
- plascan = set()
- count = 0
- for i in contigDict:
- if contigDict[i].length >= args.min_can_length:
- count += 1
- x = np.zeros(count)
- y = np.zeros(count)
- contignamelist = []
- index = 0
- for i in contigDict:
- if contigDict[i].length >= args.min_can_length:
- x[index] = contigDict[i].gccontent
- y[index] = contigDict[i].coverage
- contignamelist.append(i)
- index += 1
- thec, ther = theil_sen(x, y)
- lessthan = []
- for i in range(len(x)):
- if x[i] * ther + thec - y[i] >= 0:
- lessthan.append(x[i] * ther + thec - y[i])
- lessthansd = (sum(lessthan) * 1.0 / len(lessthan)) ** 0.5
- templessthan = []
- for i in lessthan:
- if i <= 4 * lessthansd:
- templessthan.append(i)
- lessthan = templessthan
- lessthan.sort()
- thecutoff = lessthan[int(0.95 * len(lessthan))]
- out = open(args.work_dir + '/coverage.csv', 'w')
- for i in contigDict:
- out.write(i + '\t' + str(contigDict[i].coverage) + '\t' + str(contigDict[i].gccontent) + '\t' + str(contigDict[i].length)
- + '\t' + str(contigDict[i].gccontent * ther + thec + thecutoff) + '\t' + str(contigDict[i].gccontent * ther + thec) + '\n')
- out.write('\n\n\n')
- for i in contigDict:
- if contigDict[i].length > args.min_can_length:
- out.write(i + '\t' + str(contigDict[i].coverage) + '\t' + str(contigDict[i].gccontent) + '\t' + str(contigDict[i].length)
- + '\t' + str(contigDict[i].gccontent * ther + thec + thecutoff) + '\t' + str(contigDict[i].gccontent * ther + thec) + '\n')
- for i in contigDict:
- contigDict[i].predScore = min([2, (contigDict[i].coverage - (contigDict[i].gccontent * ther + thec)) / thecutoff])
- if contigDict[i].length >= args.min_can_length and contigDict[i].coverage >= contigDict[i].gccontent * ther + thec + thecutoff:
- if args.filter_high_cov and contigDict[i].coverage <= contigDict[i].gccontent * ther * 2 + thec * 2 - thecutoff:
- plascan.add(i)
- elif not args.filter_high_cov:
- plascan.add(i)
- out.close()
- return plascan
-
-
-
-
-# scaffold
-def scaffold(candContigs):
- pass
-
-def best_guess(candContigs):
- pass
-
-
-def get_pred_qual(args, candidates, initit, onlyplas=False):
- first = True
- plasconts = set()
- chromconts = set()
- totalplaslengths = 0
- sharedbp, TPbp, FPbp, TNbp, FNbp, unmappedbp = 0, 0, 0, 0, 0, 0
- for i in args.debug:
- tempref = open(args.work_dir + '/tempseq.fa', 'w')
- inref = open(i)
- for line in inref:
- if line.startswith('>'):
- tempref.write(line)
- seq = ''
- else:
- seq += line.rstrip()
- tempref.write(seq + seq + '\n')
- tempref.close()
- thelen = len(seq)
- gotthem = set()
- gotthem2 = set()
- subprocess.Popen('makeblastdb -dbtype nucl -out ' + args.work_dir + '/tempdb -in ' + args.work_dir + '/tempseq.fa', shell=True, stdout=subprocess.PIPE).wait()
- subprocess.Popen('blastn -db ' + args.work_dir + '/tempdb -outfmt 6 -num_threads 8 -query ' + args.work_dir + '/contigs.fa -out ' + args.work_dir + '/contigs_tempdb.out', shell=True).wait()
- blast = open(args.work_dir + '/contigs_tempdb.out')
- for line in blast:
- query, subject, ident, length, mm, indel, qstart, qstop, rstart, rstop, eval, bitscore = line.split()
- qstart, qstop, rstart, rstop, length, mm = map(int, [qstart, qstop, rstart, rstop, length, mm])
- ident = float(ident)
- if length >= 0.99 * contigDict[query].length and ident >= 90.0:
- if first and not onlyplas:
- chromconts.add(query)
- else:
- plasconts.add(query)
- if min([rstart, rstop]) <= thelen:
- if query in candidates:
- for q in range(min([rstart, rstop]), max([rstart, rstop]) + 1):
- gotthem.add(q)
- for q in range(min([rstart, rstop]), max([rstart, rstop]) + 1):
- gotthem2.add(q)
- if not first or onlyplas:
- TPbp += len(gotthem)
- FNbp += len(gotthem2)
- totalplaslengths += thelen
- first = False
- shared, TP, FP, TN, FN, unmapped = 0, 0, 0, 0, 0, 0
- for i in contigDict:
- if i in chromconts and i in plasconts:
- shared += 1
- sharedbp += contigDict[i].length
- if i in candidates:
- TP += 1
- else:
- FN += 1
- elif i in chromconts:
- if i in candidates:
- FP += 1
- FPbp += contigDict[i].length
- else:
- TN += 1
- TNbp += contigDict[i].length
- elif i in plasconts:
- if i in candidates:
- TP += 1
- else:
- FN += 1
- else:
- unmapped += 1
- unmappedbp += contigDict[i].length
- if onlyplas:
- if i in candidates:
- FP += 1
- FPbp += contigDict[i].length
- else:
- TN += 1
- TNbp += contigDict[i].length
- if initit:
- out = open(args.work_dir + '/degbug.txt', 'w')
- else:
- out = open(args.work_dir + '/degbug.txt', 'a')
- #out.write('candidates\n' + '\t'.join(candidates) + '\n')
- if initit:
- out.write('predictive power initial set\ntp\tfp\ttn\tfn\tsensitivity\tprecision\tunmapped\tshared\n')
- else:
- out.write('predictive power final set\ntp\tfp\ttn\tfn\tsensitivity\tprecision\tunmapped\tshared\n')
- try:
- out.write('\t'.join(map(str, [TP, FP, TN, FN, TP * 1.0 / (TP+FN), TP * 1.0 / (TP+FP), unmapped, shared])) + '\n')
- except:
- out.write('\t'.join(map(str, [TP, FP, TN, FN, 0, 0, unmapped, shared])) + '\n')
- if initit:
- out.write('predictive power initial set (bp)\ntp\tfp\ttn\tfn\tsensitivity\tprecision\tunmapped\tshared\tplasass\n')
- else:
- out.write('predictive power final set (bp)\ntp\tfp\ttn\tfn\tsensitivity\tprecision\tunmapped\tshared\tplasass\n')
- try:
- out.write('\t'.join(map(str, [TPbp, FPbp, TNbp, FNbp - TPbp, TPbp * 1.0 / FNbp, TPbp * 1.0 / (TPbp+FPbp), unmappedbp, sharedbp, FNbp * 1.0 / totalplaslengths])) + '\n')
- except:
- out.write('\t'.join(map(str, [TPbp, FPbp, TNbp, FNbp - TPbp, 0, 0, unmappedbp, sharedbp, 0])) + '\n')
- out.close()
-
-# write list of candidates to FASTA file
-def write_cand(candidates, outfile):
- out = open(args.work_dir + '/' + outfile, 'w')
- for i in candidates:
- out.write(i + '\n')
- out.close()
-
-
-def main(args):
- global contigDict
- contigDict = {}
- if args.basic is None:
- load_cag(args)
- else:
- load_fasta(args)
- if os.path.exists(args.work_dir): # create a working directory if it doesn't exist
- if not os.path.isdir(args.work_dir):
- sys.stderr.write('Working directory is a file not a folder.\n')
- sys.exit()
- else:
- os.makedirs(args.work_dir)
- write_fasta_cag(args) # create a FASTA file of the graph file in the working directory
- if args.predict_cov: # if flag set predict initial candidates using coverage
- sys.stdout.write('Finding candidates.\n')
- candidates = predict_cov(args)
- sys.stdout.write(str(len(candidates)) + ' candidates found.\n')
- elif not args.blast1 is None:
- sys.stdout.write('Finding best reference..\n')
- bestref = predict_blast1(args)
- sys.stdout.write('Using reference ' + bestref + '\n')
- args.blast2 = args.blast1 + '/' + bestref
- sys.stdout.write('Finding candidates.\n')
- candidates = predict_blast2(args)
- sys.stdout.write(str(len(candidates)) + ' candidates found.\n')
- elif not args.blast_coverage is None:
- sys.stdout.write('Finding best reference..\n')
- bestref = predict_blast1(args)
- sys.stdout.write('Using reference ' + bestref + '\n')
- args.blast2 = args.blast1 + '/' + bestref
- sys.stdout.write('Finding candidates BLAST.\n')
- candidates = predict_blast2(args)
- sys.stdout.write(str(len(candidates)) + ' candidates found.\n')
- sys.stdout.write('Finding candidates.\n')
- candidates2 = predict_cov(args)
- sys.stdout.write(str(len(candidates2)) + ' candidates found.\n')
- elif not args.blast2 is None:
- sys.stdout.write('Finding candidates.\n')
- candidates = predict_blast2(args)
- sys.stdout.write(str(len(candidates)) + ' candidates found.\n')
- if not args.debug is None:
- get_pred_qual(args, candidates, True, args.only_plas)
- if args.blast_coverage is None:
- write_cand(candidates, 'candidates.txt')
- else:
- write_cand(candidates, 'BLAST_candidates.txt')
- write_cand(candidates2, 'coverage_candidates.txt')
- if args.basic is None:
- sys.stdout.write('Basic mode finished.')
- return
- sys.stdout.write('Creating graph\n')
- dg = contigDict2nx()
- sys.stdout.write('Graph created.\nFinding paths between candidates\n')
- if args.blast_coverage is None:
- paths = find_paths(dg, candidates, args)
- sys.stdout.write(str(len(paths)) + ' paths found.\n')
- if args.check_paths: # check paths with paired-end reads TODO
- pass
- sys.stdout.write('Finding shortest paths.\n')
- pathDict = getShortest(paths)
- count = 0
- for i in pathDict:
- count += len(pathDict[i])
- sys.stdout.write(str(count) + ' paths remaining.\nPredicting final set of contigs..\n')
- newcandidates = predict_contigs(pathDict, candidates)
- sys.stdout.write(str(len(newcandidates)) + ' predicted plasmid contigs.\n')
- newcandidates = getcolor(newcandidates, candidates, set(), set())
- else:
- paths = find_paths(dg, candidates, args)
- sys.stdout.write(str(len(paths)) + 'and ' + str(len(paths2)) + ' paths found.\n')
- if args.check_paths: # check paths with paired-end reads TODO
- pass
- sys.stdout.write('Finding shortest paths BLAST.\n')
- pathDict = getShortest(paths)
- count = 0
- for i in pathDict:
- count += len(pathDict[i])
- sys.stdout.write(str(count) + ' paths remaining.\nPredicting final set of contigs..\n')
- newcandidates = predict_contigs(pathDict, candidates)
- paths = find_paths(dg, candidates2, args)
- if args.check_paths: # check paths with paired-end reads TODO
- pass
- sys.stdout.write('Finding shortest paths Coverage.\n')
- pathDict = getShortest(paths)
- count = 0
- for i in pathDict:
- count += len(pathDict[i])
- sys.stdout.write(str(count) + ' paths remaining.\nPredicting final set of contigs..\n')
- newcandidates2 = predict_contigs(pathDict, candidates)
- newcandidates = getcolor(newcandidates, candidates, newcandidates2, candidates2)
- if not args.debug is None:
- get_pred_qual(args, newcandidates, False, args.only_plas)
- sys.stdout.write('Writing predicted contigs to file, thanks for using COIF.\n')
- write_cand_color(newcandidates, 'final_candidates.txt')
-
-
-
-
-
-
-parser = argparse.ArgumentParser(prog='coif.py', formatter_class=argparse.RawDescriptionHelpFormatter, description='''
-coif.py: A script for identifying plasmid contigs.
-
-COIF is run on a Contig Adjacency Graph (CAG) generated by Contiguity, for instructions on how to generate
-a CAG see Contiguity manual (in this repo)
-
-Find plasmid contigs by removing chromosomal contigs:
-coif.py -c assembly.cag -d working_dir -b2 reference_chromosome.fa
-
-Identical to above method but chooses best chromosome to use from a folder of chromosomes
-coif.py -c assembly.cag -d working_dir -b1 folder_of_reference_genomes
-
-Find plasmid using coverage of contigs:
-coif.py -c assembly.cag -d working_dir -pc
-
-Predict plasmids using a combination of both coverage and directory of references
-coif.py -c assembly.cag -d working_dir -bc folder_of_reference_genomes
-
-OUTPUT:
-COIF outputs a list of initially predicted contigs and a list of predictions in the final set
-predictions from the final set will generally be more sensitive and accurate
-
-For the combination mode all contigs will be listed with a colour based on whether they were
-predicted in the intial or final set of both prediction methods
-
- Coverage
-BLAST Not predicted in initial set in final set in both
-not predicted #FFFFFF
-in initial set
-in final set
-in both
-
-
-''', epilog="Thanks for using Contiguity")
-parser.add_argument('-c', '--cag_file', action='store', help='CAG file of assembled contigs or scaffolds and graph')
-parser.add_argument('-b', '--basic', action='store', default=None, help='Only do initial prediction, do not improve predictions with graph information. Uses a FASTA file as input instead of a CAG.')
-parser.add_argument('-d', '--work_dir', action='store', help='Working directory')
-parser.add_argument('-b1', '--blast1', action='store', default=None, help='Find best reference from folder of references and use it to predict initial set of plasmid contigs.')
-parser.add_argument('-b2', '--blast2', action='store', default=None, help='Use reference to remove chromsomal contigs.')
-parser.add_argument('-pc', '--predict_cov', action='store_true', default=False, help='Find best reference from folder of references and use it to predict initial set of plasmid contigs.')
-parser.add_argument('-bc', '--blast_coverage', action='store', default=None, help='Find best reference from folder of references and use it AND coverage to predict initial set of plasmid contigs.')
-parser.add_argument('-db', '--debug', action='store', default=None, nargs='+', help='Give references to report performance of COIF [chromosome plas1 plas2 etc.].')
-parser.add_argument('-i', '--min_ident', action='store', type=float, default=80.0, help='Min idenity of hits to draw')
-parser.add_argument('-l', '--min_length', action='store', type=int, default=0, help='Min length of hits to draw')
-parser.add_argument('-f', '--min_len_fract', action='store', type=float, default=0.1, help='Min length of hits to draw')
-parser.add_argument('-mp', '--max_path_length', action='store', type=int, default=15000, help='Max length (bp) of paths')
-parser.add_argument('-mn', '--max_path_node', action='store', type=int, default=10, help='Max nodes to search paths')
-parser.add_argument('-cp', '--check_paths', action='store_true', default=False, help='Check paths with paired end reads')
-parser.add_argument('-op', '--only_plas', action='store_true', default=False, help='only use plasmids for debug mode')
-parser.add_argument('-mc', '--min_can_length', action='store', type=int, default=500, help='minimum length of contig for initial predictions')
-parser.add_argument('-fh', '--filter_high_cov', action='store_true', default=True, help='Check paths with paired end reads')
-# parser.add_argument('-rf', '--read_file', action='store', help='read file')
-# parser.add_argument('-o', '--output_folder', action='store', help='output folder')
-# parser.add_argument('-k', '--kmer_size', action='store', type=int, default=31, help='k-mer size for finding adjacent contigs [31]')
-# parser.add_argument('-max_d', '--max_distance', action='store', type=int, default=300, help='maximum distance apart in the de bruijn graph for contigs to count as adjacent [300]')
-# parser.add_argument('-kmer_a', '--kmer_average', action='store', type=int, default=-1, help='All k-mers above half this value will be traversed [auto]')
-# parser.add_argument('-kmer_c', '--kmer_cutoff', action='store', type=int, default=-1, help='cutoff for k-mer values [auto]')
-# parser.add_argument('-ov', '--overlap', action='store', type=int, default=None, help='minimum overlap to create edge [kmer_size-1]')
-# parser.add_argument('-rl', '--min_read_length', action='store', type=int, default=75, help='Minimum read length [75]')
-# parser.add_argument('-max_mm', '--max_mismatch', action='store', type=int, default=2, help='maximum number of mismatches to count overlap [2]')
-# parser.add_argument('-lo', '--long_overlap_ident', action='store', type=int, default=85, help='minimum percent identity to create an edge where there is a long overlap [85]')
-# parser.add_argument('-mp', '--minimum_pairs_edge', action='store', type=int, default=2, help='Minimum pairs to create edge [2]')
-# parser.add_argument('-is', '--max_insert_size', action='store', type=int, default=600, help='Upper bound on insert size [600]')
-# parser.add_argument('-cl', '--command_line', action='store_true', default=False, help='Run contiguity in command line mode')
-# parser.add_argument('-no', '--no_overlap_edges', action='store_true', default=False, help='Don\'t get overlap edges')
-# parser.add_argument('-nd', '--no_db_edges', action='store_true', default=False, help='Don\'t get De Bruijn edges')
-# parser.add_argument('-np', '--no_paired_edges', action='store_true', default=False, help='Don\'t get paired-end edges')
-# parser.add_argument('-km', '--khmer', action='store_false', default=True, help='Don\'t use khmer for De Bruijn graph contruction (not recommended)')
-# parser.add_argument('-nt', '--num_threads', action='store', type=int, default=1, help='Number of threads to use for hash table building with khmer and for mapping reads with bowtie')
-# parser.add_argument('-ht_s', '--ht_size', action='store', default='2e9', help='Hash table size, for more information check http://khmer.readthedocs.org/en/v1.1/choosing-table-sizes.html')
-# parser.add_argument('-ht_n', '--ht_number', action='store', type=int, default=4, help='Hash table number, for more information check http://khmer.readthedocs.org/en/v1.1/choosing-table-sizes.html')
-
-
-
-args = parser.parse_args()
-
-main(args)
diff --git a/do_release.sh b/do_release.sh
deleted file mode 100755
index 0904b2b..0000000
--- a/do_release.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-# Release script
-# Copyright (C) 2013-2015 Mitchell Jon Stanton-Cook
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# m.stantoncook@gmail.com
-# School of Chemistry & Molecular Biosciences
-# The University of Queensland
-# Brisbane, QLD 4072.
-# Australia
-
-
-#VERSION=1.0.3
-
-# Perform an install-uninstall cycle
-pip uninstall Contiguity
-python setup.py install
-pip uninstall Contiguity
-python setup.py clean
-
-
-# Do all the versioning stuff here..
-bumpversion patch
-
-
-# Clean, test, build the source distribution & pip install it
-# Need to get exit statuses here...
-python setup.py clean
-#python setup.py test
-#STATUS=`echo $?`
-#if [ $STATUS -eq 0 ]; then
-# echo ""
-#else
-# echo "Tests failed. Will not release"
-# exit
-#fi
-
-python setup.py sdist bdist_wheel
-pip install dist/Contiguity-$VERSION.tar.gz
-STATUS=`echo $?`
-if [ $STATUS -eq 0 ]; then
- echo ""
-else
- echo "Package is not pip installable. Will not release"
- exit
-fi
-
-
-# Docs
-# Need to get exit statuses here...
-cd docs
-make clean
-sphinx-apidoc -o API ../Contiguity
-mv API/* .
-rmdir API
-make html
-cd ..
-
-git push
-# tag & push the tag to github
-GIT=`git status`
-CLEAN='# On branch master nothing to commit, working directory clean'
-if [ "$s1" == "$s2" ]; then
- git tag v$VERSION
- git push --tags
-else
- echo "Git not clean. Will not release"
- exit
-fi
-
-
-# Upload to PyPI & clean
-twine upload -u mscook -p $PYPIPASS dist/* && python setup.py clean
diff --git a/docs/Contiguity.rst b/docs/Contiguity.rst
deleted file mode 100644
index b9c860c..0000000
--- a/docs/Contiguity.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Contiguity package
-==================
-
-Subpackages
------------
-
-.. toctree::
-
- Contiguity.util
-
-Submodules
-----------
-
-Contiguity.Contiguity module
-----------------------------
-
-.. automodule:: Contiguity.Contiguity
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: Contiguity
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/Contiguity.util.rst b/docs/Contiguity.util.rst
deleted file mode 100644
index 9005778..0000000
--- a/docs/Contiguity.util.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-Contiguity.util package
-=======================
-
-Submodules
-----------
-
-Contiguity.util.checkCSAG module
---------------------------------
-
-.. automodule:: Contiguity.util.checkCSAG
- :members:
- :undoc-members:
- :show-inheritance:
-
-Contiguity.util.checkLG module
-------------------------------
-
-.. automodule:: Contiguity.util.checkLG
- :members:
- :undoc-members:
- :show-inheritance:
-
-Contiguity.util.coif module
----------------------------
-
-.. automodule:: Contiguity.util.coif
- :members:
- :undoc-members:
- :show-inheritance:
-
-
-Module contents
----------------
-
-.. automodule:: Contiguity.util
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/docs/manual/Contiguity_SS.png b/docs/Contiguity_SS.png
similarity index 100%
rename from docs/manual/Contiguity_SS.png
rename to docs/Contiguity_SS.png
diff --git a/docs/manual/Contiguity_manual.docx b/docs/Contiguity_manual.docx
similarity index 100%
rename from docs/manual/Contiguity_manual.docx
rename to docs/Contiguity_manual.docx
diff --git a/docs/manual/Contiguity_manual.pdf b/docs/Contiguity_manual.pdf
similarity index 100%
rename from docs/manual/Contiguity_manual.pdf
rename to docs/Contiguity_manual.pdf
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 3008416..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,177 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
-
-help:
- @echo "Please use \`make ' where is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " xml to make Docutils-native XML files"
- @echo " pseudoxml to make pseudoxml-XML files for display purposes"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-singlehtml:
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Contiguity.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Contiguity.qhc"
-
-devhelp:
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/Contiguity"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Contiguity"
- @echo "# devhelp"
-
-epub:
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-latexpdfja:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through platex and dvipdfmx..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-info:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
-
-xml:
- $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
- @echo
- @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-pseudoxml:
- $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
- @echo
- @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index 25e5ed9..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Contiguity documentation build configuration file, created by
-# sphinx-quickstart on Tue Feb 24 14:23:38 2015.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import sys
-import os
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- 'sphinx.ext.autodoc',
- 'sphinx.ext.doctest',
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.todo',
- 'sphinx.ext.coverage',
- 'sphinx.ext.mathjax',
- 'sphinx.ext.ifconfig',
- 'sphinx.ext.viewcode',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The encoding of source files.
-#source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'Contiguity'
-copyright = u'2015, Mitchell Sullivan'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '1.0.3'
-# The full version, including alpha/beta/rc tags.
-release = '1.0.3'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#today = ''
-# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = 'default'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents. If None, it defaults to
-# " v documentation".
-#html_title = None
-
-# A shorter title for the navigation bar. Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#html_extra_path = []
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_domain_indices = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a tag referring to it. The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Contiguitydoc'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-# author, documentclass [howto, manual, or own class]).
-latex_documents = [
- ('index', 'Contiguity.tex', u'Contiguity Documentation',
- u'Mitchell Sullivan', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#latex_use_parts = False
-
-# If true, show page references after internal links.
-#latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#latex_appendices = []
-
-# If false, no module index is generated.
-#latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- ('index', 'contiguity', u'Contiguity Documentation',
- [u'Mitchell Sullivan'], 1)
-]
-
-# If true, show URL addresses after external links.
-#man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- ('index', 'Contiguity', u'Contiguity Documentation',
- u'Mitchell Sullivan', 'Contiguity', 'One line description of project.',
- 'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
-
-# If false, no module index is generated.
-#texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
-
-
-# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'http://docs.python.org/': None}
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index f84905a..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-.. Contiguity documentation master file, created by
- sphinx-quickstart on Tue Feb 24 14:23:38 2015.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-Welcome to Contiguity's documentation!
-======================================
-
-Contents:
-
-.. toctree::
- :maxdepth: 2
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
-
diff --git a/docs/modules.rst b/docs/modules.rst
deleted file mode 100644
index efdac3d..0000000
--- a/docs/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Contiguity
-==========
-
-.. toctree::
- :maxdepth: 4
-
- Contiguity
diff --git a/examples_files/example1.zip b/examples_files/example1.zip
new file mode 100644
index 0000000..f1ebac7
Binary files /dev/null and b/examples_files/example1.zip differ
diff --git a/requirements-dev.txt b/requirements-dev.txt
deleted file mode 100644
index dbcf58a..0000000
--- a/requirements-dev.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-Sphinx==1.2.3
-bumpversion==0.5.1
-wheel==0.24.0
-twine==1.4.0
diff --git a/setup.py b/setup.py
index 388c130..ef24ddb 100644
--- a/setup.py
+++ b/setup.py
@@ -1,94 +1,19 @@
-#!/usr/bin/env python
+# cx_Freeze setup file
-import os
import sys
-import glob
-
-# Try and import pip. We'll stop if it is not present
-try:
- import pip
-except ImportError:
- print "Installation of Contiguity requires pip. Please install it! See -"
- print "http://pip.readthedocs.org/en/latest/installing.html"
- sys.exit(1)
-
-from setuptools import setup
-
-__title__ = 'Contiguity'
-__version__ = '1.0.3'
-__description__ = "Tool for visualising assemblies"
-__author__ = 'Mitchell Sullivan'
-__license__ = 'GPLv3'
-__author_email__ = "mjsull@gmail.com"
-__url__ = 'https://github.com/BeatsonLab-MicrobialGenomics/Contiguity'
-
-
-# Helper functions
-if sys.argv[-1] == 'publish':
- print "Please use twine or do_release.sh"
- sys.exit()
-
-if sys.argv[-1] == 'clean':
- os.system('rm -rf Contiguity.egg-info build dist')
- sys.exit()
-
-if sys.argv[-1] == 'docs':
- os.system('cd docs && make html')
- sys.exit()
-
-
-packages = [__title__, ]
-
-requires = []
-with open('requirements.txt') as fin:
- lines = fin.readlines()
- for line in lines:
- requires.append(line.strip())
-
-# Build lists to package the docs
-html, sources, static = [], [], []
-html_f = glob.glob('docs/_build/html/*')
-accessory = glob.glob('docs/_build/html/*/*')
-for f in html_f:
- if os.path.isfile(f):
- html.append(f)
-for f in accessory:
- if f.find("_static") != -1:
- if os.path.isfile(f):
- static.append(f)
- elif f.find("_sources"):
- if os.path.isfile(f):
- sources.append(f)
-
-setup(
- name=__title__,
- version=__version__,
- description=__description__,
- long_description=open('README.rst').read(),
- author=__author__,
- author_email=__author_email__,
- url=__url__,
- packages=packages,
- test_suite="tests",
- package_dir={__title__.lower(): __title__},
- scripts=[__title__.lower()+'/'+__title__],
- package_data={},
- data_files=[('', ['LICENSE', 'requirements.txt', 'README.rst']),
- ('docs', html), ('docs/_static', static),
- ('docs/_sources', sources)],
- include_package_data=True,
- install_requires=requires,
- license=__license__,
- zip_safe=False,
- classifiers=('Development Status :: 4 - Beta',
- 'Environment :: X11 Applications',
- 'Intended Audience :: Science/Research',
- 'License :: OSI Approved',
- 'Natural Language :: English',
- 'Operating System :: POSIX :: Linux',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 2 :: Only',
- 'Topic :: Scientific/Engineering :: Bio-Informatics',
- 'Topic :: Scientific/Engineering :: Visualization',),
-)
+from cx_Freeze import setup, Executable
+
+# Dependencies are automatically detected, but it might need fine tuning.
+build_exe_options = {"packages": ["khmer"]}
+
+# GUI applications require a different base on Windows (the default is for a
+# console application).
+base = None
+if sys.platform == "win32":
+ base = "Win32GUI"
+
+setup( name = "Contiguity",
+ version = "1.0.4",
+ description = "Assembly graph construction and visualisation.",
+ options = {"build_exe": build_exe_options},
+ executables = [Executable("Contiguity.py", base=base)])