From bb488ba7d28d904a8bab5989c94f802f2bae047f Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 21 Mar 2024 17:21:00 +0300 Subject: [PATCH 1/6] Added Dockerfile --- Dockerfile | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6530047 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,49 @@ +FROM phusion/baseimage:jammy-1.0.1 + +ARG HTSLIB_VERSION=1.17 + +ENV TERM=xterm-256color \ + HTSLIB_URL=https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 + +COPY . minimap2_index_modifier + +# install deps and cleanup apt garbage +RUN set -eux; \ + apt-get update; \ + apt-get install -y \ + wget \ + make \ + autoconf \ + gcc \ + libcurl4-openssl-dev \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + bzip2 \ + && rm -rf /var/lib/apt/lists/*; + +#install htslib +RUN set -eux; \ +mkdir temp; \ +cd temp; \ +\ +wget ${HTSLIB_URL}; \ +tar -xf htslib-${HTSLIB_VERSION}.tar.bz2; \ +cd htslib-${HTSLIB_VERSION}; \ +\ +autoreconf -i; \ +./configure; \ +make; \ +make install; \ +cd ../../; \ +rm -rf temp; + +#install minimap2_index_modifier +RUN set -eux; \ +cd minimap2_index_modifier; \ +make; \ +cp minimap2 /usr/local/bin; \ +cd ../; \ +rm -rf minimap2_index_modifier; + +ENV LD_LIBRARY_PATH=/usr/local/lib From 4262250a56041b96fb7057206fdc1ae0d12305d9 Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 21 Mar 2024 17:21:16 +0300 Subject: [PATCH 2/6] Updated Readme.md --- README.md | 56 +++++++++++++++++++++++++++++---------------------- test/tests.md | 26 ++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 24 deletions(-) create mode 100644 test/tests.md diff --git a/README.md b/README.md index 440accc..f16819e 100644 --- a/README.md +++ b/README.md @@ -1,32 +1,40 @@ -## Build -install htslib 1.17 -make +Minimap2_index_modifier +======================= +Minimap2_index_modifier is a fork of alignment tool [Minimap2](https://github.com/lh3/minimap2). +Unlike the original tool, this can use the variants defined in the VCF file when generating the index, for more accurate alignment. -## Make gz and gz.tbi from VCF -bgzip -c filename.vcf > filename.vcf.gz\ -tabix -p vcf filename.vcf.gz -## Run modified index creation -bgzip -c test.vcf > test.vcf.gz\ -tabix -p vcf test.vcf.gz\ -./minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1.vcf.gz test/test.fasta +Minimap2_index_modifier can be used in the same way as the original minimap2. To create a modified index use additional parameter `--vcf-file-with-variants `. +> minimap2 -d index.mmi --vcf-file-with-variants input.vcf.gz reference.fasta -Use flag --parse-haplotype if your VCF contains phased haplotypes +Use flag `--parse-haplotype` if your VCF contains phased haplotypes. -## Run test +## Contents +* [Installation](#installation) + * [Compiling from source](#compiling-from-source) + * [Docker](#docker) +* [Pre-built indexes](#pre-built-indexes) +* [Tests](#tests) -### Empty test -./minimap2 -d test/test.mni test/test.fasta\ -./minimap2 -d test/test.modified.mni --vcf-file-with-variants test/empty.vcf.gz test/test.fasta\ -diff test/test.mni test/test.modified.mni +## Installation +### Compiling from source +To compile from source, use this version of tools: +* GCC/G++ 11.4.0+ +* HTSlib v1.17 -### The same test -./minimap2 -d test/test.mni test/test.fasta\ -./minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_the_same.vcf.gz test/test.fasta\ -diff test/test.mni test/test.modified.mni +Command to compile: +> cd minimap2_index_modifier && make + +### Docker +Clone this repository and build a Docker image as follows. +> docker build -t minimap2_index_modifier:2.24 . + +## Pre-built indexes +This [link](https://nextcloud.ispras.ru/index.php/s/wcb9PpZyr8Gb5CC) contains pre-built modified indexes for next references: +* GRCh38 [(GCA_000001405.15)](https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/release/references/GRCh38/) +* GRCh37 [(hs37d5)](https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/release/references/GRCh37/) + +## Tests +See [test/tests.md](test/tests.md) for more details. -### Not the same test -./minimap2 -d test/test.mni test/test.fasta\ -./minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_not_the_same.vcf.gz test/test.fasta\ -diff test/test.mni test/test.modified.mni diff --git a/test/tests.md b/test/tests.md new file mode 100644 index 0000000..a0628dd --- /dev/null +++ b/test/tests.md @@ -0,0 +1,26 @@ +## Tests for minimap2_index_modiifer +There are three tests to check base functionality. + +### Common test +This test create two index files: regular and modified. +> minimap2 -d test/test.mni test/test.fasta +> minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_not_the_same.vcf.gz test/test.fasta +> diff test/test.mni test/test.modified.mni + +Modified index file should contains extra string and some strings should be mismatched. + +### Empty VCF test +If input VCF file contains no variants from reference modified index would be same as regular one. +> minimap2 -d test/test.mni test/test.fasta +> minimap2 -d test/test.modified.mni --vcf-file-with-variants test/empty.vcf.gz test/test.fasta +> diff test/test.mni test/test.modified.mni + +Index files should be the same (empty output after third command). + +### Pseudo-variants test +In this test `test/test_long_chr1_the_same.vcf.gz` contains pseudo-variants (like A -> A, C -> C, etc). This variants would be processed with no effect. +> minimap2 -d test/test.mni test/test.fasta +> minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_the_same.vcf.gz test/test.fasta +> diff test/test.mni test/test.modified.mni + +Index files should be the same. From 08f8858857e102e502d089b7cf619b8140a1e1b4 Mon Sep 17 00:00:00 2001 From: EgorGuga Date: Thu, 21 Mar 2024 17:28:21 +0300 Subject: [PATCH 3/6] Updated Readme.md --- README.md | 12 +++++++++--- test/tests.md | 24 +++++++++++++++--------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index f16819e..c61e621 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,9 @@ Unlike the original tool, this can use the variants defined in the VCF file when Minimap2_index_modifier can be used in the same way as the original minimap2. To create a modified index use additional parameter `--vcf-file-with-variants `. -> minimap2 -d index.mmi --vcf-file-with-variants input.vcf.gz reference.fasta +```bash +minimap2 -d index.mmi --vcf-file-with-variants input.vcf.gz reference.fasta +``` Use flag `--parse-haplotype` if your VCF contains phased haplotypes. @@ -24,11 +26,15 @@ To compile from source, use this version of tools: * HTSlib v1.17 Command to compile: -> cd minimap2_index_modifier && make +```bash +cd minimap2_index_modifier && make +``` ### Docker Clone this repository and build a Docker image as follows. -> docker build -t minimap2_index_modifier:2.24 . +```bash +docker build -t minimap2_index_modifier:2.24 . +``` ## Pre-built indexes This [link](https://nextcloud.ispras.ru/index.php/s/wcb9PpZyr8Gb5CC) contains pre-built modified indexes for next references: diff --git a/test/tests.md b/test/tests.md index a0628dd..01ee755 100644 --- a/test/tests.md +++ b/test/tests.md @@ -3,24 +3,30 @@ There are three tests to check base functionality. ### Common test This test create two index files: regular and modified. -> minimap2 -d test/test.mni test/test.fasta -> minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_not_the_same.vcf.gz test/test.fasta -> diff test/test.mni test/test.modified.mni +```bash +minimap2 -d test/test.mni test/test.fasta +minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_not_the_same.vcf.gz test/test.fasta +diff test/test.mni test/test.modified.mni +``` Modified index file should contains extra string and some strings should be mismatched. ### Empty VCF test If input VCF file contains no variants from reference modified index would be same as regular one. -> minimap2 -d test/test.mni test/test.fasta -> minimap2 -d test/test.modified.mni --vcf-file-with-variants test/empty.vcf.gz test/test.fasta -> diff test/test.mni test/test.modified.mni +```bash +minimap2 -d test/test.mni test/test.fasta +minimap2 -d test/test.modified.mni --vcf-file-with-variants test/empty.vcf.gz test/test.fasta +diff test/test.mni test/test.modified.mni +``` Index files should be the same (empty output after third command). ### Pseudo-variants test In this test `test/test_long_chr1_the_same.vcf.gz` contains pseudo-variants (like A -> A, C -> C, etc). This variants would be processed with no effect. -> minimap2 -d test/test.mni test/test.fasta -> minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_the_same.vcf.gz test/test.fasta -> diff test/test.mni test/test.modified.mni +```bash +minimap2 -d test/test.mni test/test.fasta +minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_the_same.vcf.gz test/test.fasta +diff test/test.mni test/test.modified.mni +``` Index files should be the same. From 816dbc04911380fd84ec16215e83c7f14b9f8bb1 Mon Sep 17 00:00:00 2001 From: Egor Guguchkin <56133096+EgorGuga@users.noreply.github.com> Date: Thu, 21 Mar 2024 17:29:59 +0300 Subject: [PATCH 4/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c61e621..6d5d20e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ Minimap2_index_modifier ======================= -Minimap2_index_modifier is a fork of alignment tool [Minimap2](https://github.com/lh3/minimap2). +Minimap2_index_modifier is a fork of alignment tool [minimap2](https://github.com/lh3/minimap2). Unlike the original tool, this can use the variants defined in the VCF file when generating the index, for more accurate alignment. From 8bbe299a8c258cfc90965e7436175d23d5bc68ed Mon Sep 17 00:00:00 2001 From: Egor Guguchkin <56133096+EgorGuga@users.noreply.github.com> Date: Thu, 21 Mar 2024 17:30:24 +0300 Subject: [PATCH 5/6] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6d5d20e..7a8a7aa 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ Minimap2_index_modifier is a fork of alignment tool [minimap2](https://github.co Unlike the original tool, this can use the variants defined in the VCF file when generating the index, for more accurate alignment. -Minimap2_index_modifier can be used in the same way as the original minimap2. To create a modified index use additional parameter `--vcf-file-with-variants `. +Minimap2_index_modifier can be used in the same way as the original minimap2. +To create a modified index use additional parameter `--vcf-file-with-variants `. ```bash minimap2 -d index.mmi --vcf-file-with-variants input.vcf.gz reference.fasta ``` From 3b3dcc35d369c8b50709446f5b4d3fd3afbcec79 Mon Sep 17 00:00:00 2001 From: Egor Guguchkin <56133096+EgorGuga@users.noreply.github.com> Date: Mon, 1 Apr 2024 16:30:16 +0300 Subject: [PATCH 6/6] Rename tests.md to README.md --- test/{tests.md => README.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{tests.md => README.md} (100%) diff --git a/test/tests.md b/test/README.md similarity index 100% rename from test/tests.md rename to test/README.md