diff --git a/.gitignore b/.gitignore
index d5a6dd6..f7bf712 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+.vscode/
+htslib/
 .cproject
 .project
 .*.swp
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..6530047
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,49 @@
+FROM phusion/baseimage:jammy-1.0.1
+
+ARG HTSLIB_VERSION=1.17
+
+ENV TERM=xterm-256color \
+    HTSLIB_URL=https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2
+
+COPY . minimap2_index_modifier
+
+# install deps and cleanup apt garbage
+RUN set -eux; \
+  apt-get update; \
+  apt-get install -y \
+    wget \
+    make \
+    autoconf \
+    gcc \
+    libcurl4-openssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    liblzma-dev \
+    bzip2 \
+  && rm -rf /var/lib/apt/lists/*;
+
+#install htslib
+RUN set -eux; \
+mkdir temp; \
+cd temp; \
+\
+wget ${HTSLIB_URL}; \
+tar -xf htslib-${HTSLIB_VERSION}.tar.bz2; \
+cd htslib-${HTSLIB_VERSION}; \
+\
+autoreconf -i; \
+./configure; \
+make; \
+make install; \
+cd ../../; \
+rm -rf temp;
+
+#install minimap2_index_modifier
+RUN set -eux; \
+cd minimap2_index_modifier; \
+make; \
+cp minimap2 /usr/local/bin; \
+cd ../; \
+rm -rf minimap2_index_modifier;
+
+ENV LD_LIBRARY_PATH=/usr/local/lib
diff --git a/Makefile b/Makefile
index 4118616..08a38a1 100644
--- a/Makefile
+++ b/Makefile
@@ -2,11 +2,11 @@ CFLAGS=		-g -Wall -O2 -Wc++-compat #-Wextra
 CPPFLAGS=	-DHAVE_KALLOC
 INCLUDES=
 OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
-			lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
+			lchain.o align.o hit.o seed.o linked_vcf_list.o map.o format.o pe.o esterr.o splitidx.o \
 			ksw2_ll_sse.o
 PROG=		minimap2
 PROG_EXTRA=	sdust minimap2-lite
-LIBS=		-lm -lz -lpthread
+LIBS=		-lm -lz -lpthread -lhts
 
 ifeq ($(arm_neon),) # if arm_neon is not defined
 ifeq ($(sse2only),) # if sse2only is not defined
@@ -53,7 +53,7 @@ minimap2-lite:example.o libminimap2.a
 libminimap2.a:$(OBJS)
 		$(AR) -csru $@ $(OBJS)
 
-sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
+sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h linked_vcf_list.h
 		$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
 
 # SSE-specific targets on x86/x86_64
@@ -128,5 +128,6 @@ options.o: mmpriv.h minimap.h bseq.h kseq.h
 pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
 sdust.o: kalloc.h kdq.h kvec.h sdust.h
 seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
-sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
+linked_vcf_list.o: mmpriv.h minimap.h linked_vcf_list.h
+sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h linked_vcf_list.h
 splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
diff --git a/README.md b/README.md
index 13c4938..fc64f5d 100644
--- a/README.md
+++ b/README.md
@@ -1,403 +1,53 @@
-[![GitHub Downloads](https://img.shields.io/github/downloads/lh3/minimap2/total.svg?style=social&logo=github&label=Download)](https://github.com/lh3/minimap2/releases)
-[![BioConda Install](https://img.shields.io/conda/dn/bioconda/minimap2.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/minimap2)
-[![PyPI](https://img.shields.io/pypi/v/mappy.svg?style=flat)](https://pypi.python.org/pypi/mappy)
-[![Build Status](https://github.com/lh3/minimap2/actions/workflows/ci.yaml/badge.svg)](https://github.com/lh3/minimap2/actions)
-## <a name="started"></a>Getting Started
-```sh
-git clone https://github.com/lh3/minimap2
-cd minimap2 && make
-# long sequences against a reference genome
-./minimap2 -a test/MT-human.fa test/MT-orang.fa > test.sam
-# create an index first and then map
-./minimap2 -x map-ont -d MT-human-ont.mmi test/MT-human.fa
-./minimap2 -a MT-human-ont.mmi test/MT-orang.fa > test.sam
-# use presets (no test data)
-./minimap2 -ax map-pb ref.fa pacbio.fq.gz > aln.sam       # PacBio CLR genomic reads
-./minimap2 -ax map-ont ref.fa ont.fq.gz > aln.sam         # Oxford Nanopore genomic reads
-./minimap2 -ax map-hifi ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.19 or later)
-./minimap2 -ax asm20 ref.fa pacbio-ccs.fq.gz > aln.sam    # PacBio HiFi/CCS genomic reads (v2.18 or earlier)
-./minimap2 -ax sr ref.fa read1.fa read2.fa > aln.sam      # short genomic paired-end reads
-./minimap2 -ax splice ref.fa rna-reads.fa > aln.sam       # spliced long reads (strand unknown)
-./minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam  # noisy Nanopore Direct RNA-seq
-./minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam    # Final PacBio Iso-seq or traditional cDNA
-./minimap2 -ax splice --junc-bed anno.bed12 ref.fa query.fa > aln.sam  # prioritize on annotated junctions
-./minimap2 -cx asm5 asm1.fa asm2.fa > aln.paf             # intra-species asm-to-asm alignment
-./minimap2 -x ava-pb reads.fa reads.fa > overlaps.paf     # PacBio read overlap
-./minimap2 -x ava-ont reads.fa reads.fa > overlaps.paf    # Nanopore read overlap
-# man page for detailed command line options
-man ./minimap2.1
-```
-
-## Table of Contents
-
-- [Getting Started](#started)
-- [Users' Guide](#uguide)
-  - [Installation](#install)
-  - [General usage](#general)
-  - [Use cases](#cases)
-    - [Map long noisy genomic reads](#map-long-genomic)
-    - [Map long mRNA/cDNA reads](#map-long-splice)
-    - [Find overlaps between long reads](#long-overlap)
-    - [Map short accurate genomic reads](#short-genomic)
-    - [Full genome/assembly alignment](#full-genome)
-  - [Advanced features](#advanced)
-    - [Working with >65535 CIGAR operations](#long-cigar)
-    - [The cs optional tag](#cs)
-    - [Working with the PAF format](#paftools)
-  - [Algorithm overview](#algo)
-  - [Getting help](#help)
-  - [Citing minimap2](#cite)
-- [Developers' Guide](#dguide)
-- [Limitations](#limit)
-
-## <a name="uguide"></a>Users' Guide
-
-Minimap2 is a versatile sequence alignment program that aligns DNA or mRNA
-sequences against a large reference database. Typical use cases include: (1)
-mapping PacBio or Oxford Nanopore genomic reads to the human genome; (2)
-finding overlaps between long reads with error rate up to ~15%; (3)
-splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA reads
-against a reference genome; (4) aligning Illumina single- or paired-end reads;
-(5) assembly-to-assembly alignment; (6) full-genome alignment between two
-closely related species with divergence below ~15%.
+Minimap2_index_modifier
+=======================
+Minimap2_index_modifier is a fork of alignment tool [minimap2](https://github.com/lh3/minimap2).
+Unlike the original tool, this can use the variants defined in the VCF file when generating the index, for more accurate alignment.
 
-For ~10kb noisy reads sequences, minimap2 is tens of times faster than
-mainstream long-read mappers such as BLASR, BWA-MEM, NGMLR and GMAP. It is more
-accurate on simulated long reads and produces biologically meaningful alignment
-ready for downstream analyses. For >100bp Illumina short reads, minimap2 is
-three times as fast as BWA-MEM and Bowtie2, and as accurate on simulated data.
-Detailed evaluations are available from the [minimap2 paper][doi] or the
-[preprint][preprint].
 
-### <a name="install"></a>Installation
-
-Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
-the [release page][release] with:
-```sh
-curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
-./minimap2-2.24_x64-linux/minimap2
+Minimap2_index_modifier can be used in the same way as the original minimap2.  
+To create a modified index use additional parameter `--vcf-file-with-variants <vcf-file>`.
+```bash
+minimap2 -d index.mmi --vcf-file-with-variants input.vcf.gz reference.fasta
 ```
-If you want to compile from the source, you need to have a C compiler, GNU make
-and zlib development files installed. Then type `make` in the source code
-directory to compile. If you see compilation errors, try `make sse2only=1`
-to disable SSE4 code, which will make minimap2 slightly slower.
 
-Minimap2 also works with ARM CPUs supporting the NEON instruction sets. To
-compile for 32 bit ARM architectures (such as ARMv7), use `make arm_neon=1`. To
-compile for for 64 bit ARM architectures (such as ARMv8), use `make arm_neon=1
-aarch64=1`.
+Use flag `--parse-haplotype` if your VCF contains phased haplotypes.
 
-Minimap2 can use [SIMD Everywhere (SIMDe)][simde] library for porting
-implementation to the different SIMD instruction sets. To compile using SIMDe,
-use `make -f Makefile.simde`. To compile for ARM CPUs, use `Makefile.simde`
-with the ARM related command lines given above.
+## Contents
+* [Installation](#installation)
+  * [Compiling from source](#compiling-from-source)
+  * [Docker](#docker)
+  * [Dockerhub](#dockerhub)
+* [Pre-built indexes](#pre-built-indexes)
+* [Tests](#tests)
 
-### <a name="general"></a>General usage
+## Installation
+### Compiling from source
+To compile from source, use this version of tools:
 
-Without any options, minimap2 takes a reference database and a query sequence
-file as input and produce approximate mapping, without base-level alignment
-(i.e. coordinates are only approximate and no CIGAR in output), in the [PAF format][paf]:
-```sh
-minimap2 ref.fa query.fq > approx-mapping.paf
-```
-You can ask minimap2 to generate CIGAR at the `cg` tag of PAF with:
-```sh
-minimap2 -c ref.fa query.fq > alignment.paf
-```
-or to output alignments in the [SAM format][sam]:
-```sh
-minimap2 -a ref.fa query.fq > alignment.sam
-```
-Minimap2 seamlessly works with gzip'd FASTA and FASTQ formats as input. You
-don't need to convert between FASTA and FASTQ or decompress gzip'd files first.
+* GCC/G++ 11.4.0+
+* HTSlib v1.17
 
-For the human reference genome, minimap2 takes a few minutes to generate a
-minimizer index for the reference before mapping. To reduce indexing time, you
-can optionally save the index with option **-d** and replace the reference
-sequence file with the index file on the minimap2 command line:
-```sh
-minimap2 -d ref.mmi ref.fa                     # indexing
-minimap2 -a ref.mmi reads.fq > alignment.sam   # alignment
+Command to compile:
+```bash
+cd minimap2_index_modifier && make
 ```
-***Importantly***, it should be noted that once you build the index, indexing
-parameters such as **-k**, **-w**, **-H** and **-I** can't be changed during
-mapping. If you are running minimap2 for different data types, you will
-probably need to keep multiple indexes generated with different parameters.
-This makes minimap2 different from BWA which always uses the same index
-regardless of query data types.
 
-### <a name="cases"></a>Use cases
-
-Minimap2 uses the same base algorithm for all applications. However, due to the
-different data types it supports (e.g. short vs long reads; DNA vs mRNA reads),
-minimap2 needs to be tuned for optimal performance and accuracy. It is usually
-recommended to choose a preset with option **-x**, which sets multiple
-parameters at the same time. The default setting is the same as `map-ont`.
-
-#### <a name="map-long-genomic"></a>Map long noisy genomic reads
-
-```sh
-minimap2 -ax map-pb  ref.fa pacbio-reads.fq > aln.sam   # for PacBio CLR reads
-minimap2 -ax map-ont ref.fa ont-reads.fq > aln.sam      # for Oxford Nanopore reads
+### Docker
+Clone this repository and build a Docker image as follows.
+```bash
+docker build -t minimap2_index_modifier:2.24 .
 ```
-The difference between `map-pb` and `map-ont` is that `map-pb` uses
-homopolymer-compressed (HPC) minimizers as seeds, while `map-ont` uses ordinary
-minimizers as seeds. Emperical evaluation suggests HPC minimizers improve
-performance and sensitivity when aligning PacBio CLR reads, but hurt when aligning
-Nanopore reads.
-
-#### <a name="map-long-splice"></a>Map long mRNA/cDNA reads
 
-```sh
-minimap2 -ax splice:hq -uf ref.fa iso-seq.fq > aln.sam       # PacBio Iso-seq/traditional cDNA
-minimap2 -ax splice ref.fa nanopore-cdna.fa > aln.sam        # Nanopore 2D cDNA-seq
-minimap2 -ax splice -uf -k14 ref.fa direct-rna.fq > aln.sam  # Nanopore Direct RNA-seq
-minimap2 -ax splice --splice-flank=no SIRV.fa SIRV-seq.fa    # mapping against SIRV control
+### Dockerhub
+Clone Docker image from Dockerhub.
+```bash
+docker pull egorguga/minimap2_index_modifier:2.24
 ```
-There are different long-read RNA-seq technologies, including tranditional
-full-length cDNA, EST, PacBio Iso-seq, Nanopore 2D cDNA-seq and Direct RNA-seq.
-They produce data of varying quality and properties. By default, `-x splice`
-assumes the read orientation relative to the transcript strand is unknown. It
-tries two rounds of alignment to infer the orientation and write the strand to
-the `ts` SAM/PAF tag if possible. For Iso-seq, Direct RNA-seq and tranditional
-full-length cDNAs, it would be desired to apply `-u f` to force minimap2 to
-consider the forward transcript strand only. This speeds up alignment with
-slight improvement to accuracy. For noisy Nanopore Direct RNA-seq reads, it is
-recommended to use a smaller k-mer size for increased sensitivity to the first
-or the last exons.
-
-Minimap2 rates an alignment by the score of the max-scoring sub-segment,
-*excluding* introns, and marks the best alignment as primary in SAM. When a
-spliced gene also has unspliced pseudogenes, minimap2 does not intentionally
-prefer spliced alignment, though in practice it more often marks the spliced
-alignment as the primary. By default, minimap2 outputs up to five secondary
-alignments (i.e. likely pseudogenes in the context of RNA-seq mapping). This
-can be tuned with option **-N**.
-
-For long RNA-seq reads, minimap2 may produce chimeric alignments potentially
-caused by gene fusions/structural variations or by an intron longer than the
-max intron length **-G** (200k by default). For now, it is not recommended to
-apply an excessively large **-G** as this slows down minimap2 and sometimes
-leads to false alignments.
-
-It is worth noting that by default `-x splice` prefers GT[A/G]..[C/T]AG
-over GT[C/T]..[A/G]AG, and then over other splicing signals. Considering
-one additional base improves the junction accuracy for noisy reads, but
-reduces the accuracy when aligning against the widely used SIRV control data.
-This is because SIRV does not honor the evolutionarily conservative splicing
-signal. If you are studying SIRV, you may apply `--splice-flank=no` to let
-minimap2 only model GT..AG, ignoring the additional base.
-
-Since v2.17, minimap2 can optionally take annotated genes as input and
-prioritize on annotated splice junctions. To use this feature, you can 
-```sh
-paftools.js gff2bed anno.gff > anno.bed
-minimap2 -ax splice --junc-bed anno.bed ref.fa query.fa > aln.sam
-```
-Here, `anno.gff` is the gene annotation in the GTF or GFF3 format (`gff2bed`
-automatically tests the format). The output of `gff2bed` is in the 12-column
-BED format, or the BED12 format. With the `--junc-bed` option, minimap2 adds a
-bonus score (tuned by `--junc-bonus`) if an aligned junction matches a junction
-in the annotation. Option `--junc-bed` also takes 5-column BED, including the
-strand field. In this case, each line indicates an oriented junction.
-
-#### <a name="long-overlap"></a>Find overlaps between long reads
-
-```sh
-minimap2 -x ava-pb  reads.fq reads.fq > ovlp.paf    # PacBio CLR read overlap
-minimap2 -x ava-ont reads.fq reads.fq > ovlp.paf    # Oxford Nanopore read overlap
-```
-Similarly, `ava-pb` uses HPC minimizers while `ava-ont` uses ordinary
-minimizers. It is usually not recommended to perform base-level alignment in
-the overlapping mode because it is slow and may produce false positive
-overlaps. However, if performance is not a concern, you may try to add `-a` or
-`-c` anyway.
-
-#### <a name="short-genomic"></a>Map short accurate genomic reads
-
-```sh
-minimap2 -ax sr ref.fa reads-se.fq > aln.sam           # single-end alignment
-minimap2 -ax sr ref.fa read1.fq read2.fq > aln.sam     # paired-end alignment
-minimap2 -ax sr ref.fa reads-interleaved.fq > aln.sam  # paired-end alignment
-```
-When two read files are specified, minimap2 reads from each file in turn and
-merge them into an interleaved stream internally. Two reads are considered to
-be paired if they are adjacent in the input stream and have the same name (with
-the `/[0-9]` suffix trimmed if present). Single- and paired-end reads can be
-mixed.
-
-Minimap2 does not work well with short spliced reads. There are many capable
-RNA-seq mappers for short reads.
-
-#### <a name="full-genome"></a>Full genome/assembly alignment
-
-```sh
-minimap2 -ax asm5 ref.fa asm.fa > aln.sam       # assembly to assembly/ref alignment
-```
-For cross-species full-genome alignment, the scoring system needs to be tuned
-according to the sequence divergence.
-
-### <a name="advanced"></a>Advanced features
-
-#### <a name="long-cigar"></a>Working with >65535 CIGAR operations
-
-Due to a design flaw, BAM does not work with CIGAR strings with >65535
-operations (SAM and CRAM work). However, for ultra-long nanopore reads minimap2
-may align ~1% of read bases with long CIGARs beyond the capability of BAM. If
-you convert such SAM/CRAM to BAM, Picard and recent samtools will throw an
-error and abort. Older samtools and other tools may create corrupted BAM.
-
-To avoid this issue, you can add option `-L` at the minimap2 command line.
-This option moves a long CIGAR to the `CG` tag and leaves a fully clipped CIGAR
-at the SAM CIGAR column. Current tools that don't read CIGAR (e.g. merging and
-sorting) still work with such BAM records; tools that read CIGAR will
-effectively ignore these records. It has been decided that future tools
-will seamlessly recognize long-cigar records generated by option `-L`.
-
-**TL;DR**: if you work with ultra-long reads and use tools that only process
-BAM files, please add option `-L`.
-
-#### <a name="cs"></a>The cs optional tag
-
-The `cs` SAM/PAF tag encodes bases at mismatches and INDELs. It matches regular
-expression `/(:[0-9]+|\*[a-z][a-z]|[=\+\-][A-Za-z]+)+/`. Like CIGAR, `cs`
-consists of series of operations.  Each leading character specifies the
-operation; the following sequence is the one involved in the operation.
-
-The `cs` tag is enabled by command line option `--cs`. The following alignment,
-for example:
-```txt
-CGATCGATAAATAGAGTAG---GAATAGCA
-||||||   ||||||||||   |||| |||
-CGATCG---AATAGAGTAGGTCGAATtGCA
-```
-is represented as `:6-ata:10+gtc:4*at:3`, where `:[0-9]+` represents an
-identical block, `-ata` represents a deletion, `+gtc` an insertion and `*at`
-indicates reference base `a` is substituted with a query base `t`. It is
-similar to the `MD` SAM tag but is standalone and easier to parse.
-
-If `--cs=long` is used, the `cs` string also contains identical sequences in
-the alignment. The above example will become
-`=CGATCG-ata=AATAGAGTAG+gtc=GAAT*at=GCA`. The long form of `cs` encodes both
-reference and query sequences in one string. The `cs` tag also encodes intron
-positions and splicing signals (see the [minimap2 manpage][manpage-cs] for
-details).
-
-#### <a name="paftools"></a>Working with the PAF format
-
-Minimap2 also comes with a (java)script [paftools.js](misc/paftools.js) that
-processes alignments in the PAF format. It calls variants from
-assembly-to-reference alignment, lifts over BED files based on alignment,
-converts between formats and provides utilities for various evaluations. For
-details, please see [misc/README.md](misc/README.md).
-
-### <a name="algo"></a>Algorithm overview
-
-In the following, minimap2 command line options have a dash ahead and are
-highlighted in bold. The description may help to tune minimap2 parameters.
-
-1. Read **-I** [=*4G*] reference bases, extract (**-k**,**-w**)-minimizers and
-   index them in a hash table.
-
-2. Read **-K** [=*200M*] query bases. For each query sequence, do step 3
-   through 7:
-
-3. For each (**-k**,**-w**)-minimizer on the query, check against the reference
-   index. If a reference minimizer is not among the top **-f** [=*2e-4*] most
-   frequent, collect its the occurrences in the reference, which are called
-   *seeds*.
-
-4. Sort seeds by position in the reference. Chain them with dynamic
-   programming. Each chain represents a potential mapping. For read
-   overlapping, report all chains and then go to step 8. For reference mapping,
-   do step 5 through 7:
-
-5. Let *P* be the set of primary mappings, which is an empty set initially. For
-   each chain from the best to the worst according to their chaining scores: if
-   on the query, the chain overlaps with a chain in *P* by **--mask-level**
-   [=*0.5*] or higher fraction of the shorter chain, mark the chain as
-   *secondary* to the chain in *P*; otherwise, add the chain to *P*.
-
-6. Retain all primary mappings. Also retain up to **-N** [=*5*] top secondary
-   mappings if their chaining scores are higher than **-p** [=*0.8*] of their
-   corresponding primary mappings.
-
-7. If alignment is requested, filter out an internal seed if it potentially
-   leads to both a long insertion and a long deletion. Extend from the
-   left-most seed. Perform global alignments between internal seeds.  Split the
-   chain if the accumulative score along the global alignment drops by **-z**
-   [=*400*], disregarding long gaps. Extend from the right-most seed.  Output
-   chains and their alignments.
-
-8. If there are more query sequences in the input, go to step 2 until no more
-   queries are left.
-
-9. If there are more reference sequences, reopen the query file from the start
-   and go to step 1; otherwise stop.
-
-### <a name="help"></a>Getting help
-
-Manpage [minimap2.1][manpage] provides detailed description of minimap2
-command line options and optional tags. The [FAQ](FAQ.md) page answers several
-frequently asked questions. If you encounter bugs or have further questions or
-requests, you can raise an issue at the [issue page][issue].  There is not a
-specific mailing list for the time being.
-
-### <a name="cite"></a>Citing minimap2
-
-If you use minimap2 in your work, please cite:
-
-> Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
-> *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
-
-## <a name="dguide"></a>Developers' Guide
-
-Minimap2 is not only a command line tool, but also a programming library.
-It provides C APIs to build/load index and to align sequences against the
-index. File [example.c](example.c) demonstrates typical uses of C APIs. Header
-file [minimap.h](minimap.h) gives more detailed API documentation. Minimap2
-aims to keep APIs in this header stable. File [mmpriv.h](mmpriv.h) contains
-additional private APIs which may be subjected to changes frequently.
-
-This repository also provides Python bindings to a subset of C APIs. File
-[python/README.rst](python/README.rst) gives the full documentation;
-[python/minimap2.py](python/minimap2.py) shows an example. This Python
-extension, mappy, is also [available from PyPI][mappypypi] via `pip install
-mappy` or [from BioConda][mappyconda] via `conda install -c bioconda mappy`.
-
-## <a name="limit"></a>Limitations
-
-* Minimap2 may produce suboptimal alignments through long low-complexity
-  regions where seed positions may be suboptimal. This should not be a big
-  concern because even the optimal alignment may be wrong in such regions.
-
-* Minimap2 requires SSE2 instructions on x86 CPUs or NEON on ARM CPUs. It is
-  possible to add non-SIMD support, but it would make minimap2 slower by
-  several times.
-
-* Minimap2 does not work with a single query or database sequence ~2
-  billion bases or longer (2,147,483,647 to be exact). The total length of all
-  sequences can well exceed this threshold.
-
-* Minimap2 often misses small exons.
-
 
+## Pre-built indexes
+This [link](https://nextcloud.ispras.ru/index.php/s/wcb9PpZyr8Gb5CC) contains pre-built modified indexes for next references:
+* GRCh38 [(GCA_000001405.15)](https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/release/references/GRCh38/)
+* GRCh37 [(hs37d5)](https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/release/references/GRCh37/)
 
-[paf]: https://github.com/lh3/miniasm/blob/master/PAF.md
-[sam]: https://samtools.github.io/hts-specs/SAMv1.pdf
-[minimap]: https://github.com/lh3/minimap
-[smartdenovo]: https://github.com/ruanjue/smartdenovo
-[longislnd]: https://www.ncbi.nlm.nih.gov/pubmed/27667791
-[gaba]: https://github.com/ocxtal/libgaba
-[ksw2]: https://github.com/lh3/ksw2
-[preprint]: https://arxiv.org/abs/1708.01492
-[release]: https://github.com/lh3/minimap2/releases
-[mappypypi]: https://pypi.python.org/pypi/mappy
-[mappyconda]: https://anaconda.org/bioconda/mappy
-[issue]: https://github.com/lh3/minimap2/issues
-[k8]: https://github.com/attractivechaos/k8
-[manpage]: https://lh3.github.io/minimap2/minimap2.html
-[manpage-cs]: https://lh3.github.io/minimap2/minimap2.html#10
-[doi]: https://doi.org/10.1093/bioinformatics/bty191
-[smide]: https://github.com/nemequ/simde
-[unimap]: https://github.com/lh3/unimap
+## Tests
+See [test/tests.md](test/tests.md) for more details.
diff --git a/index.c b/index.c
index 0d8a2ae..d7133e4 100644
--- a/index.c
+++ b/index.c
@@ -15,6 +15,12 @@
 #include "kvec.h"
 #include "khash.h"
 
+#include <htslib/hts.h>
+#include <htslib/vcf.h>
+#include <htslib/tbx.h>
+#include <htslib/kstring.h>
+#include <htslib/kseq.h>
+
 #define idx_hash(a) ((a)>>1)
 #define idx_eq(a, b) ((a)>>1 == (b)>>1)
 KHASH_INIT(idx, uint64_t, uint64_t, 1, idx_hash, idx_eq)
@@ -263,7 +269,7 @@ static void worker_post(void *g, long i, int tid)
 	kfree(0, b->a.a);
 	b->a.n = b->a.m = 0, b->a.a = 0;
 }
- 
+
 static void mm_idx_post(mm_idx_t *mi, int n_threads)
 {
 	kt_for(n_threads, worker_post, mi, 1<<mi->b);
@@ -282,6 +288,7 @@ typedef struct {
 	uint64_t batch_size, sum_len;
 	mm_bseq_file_t *fp;
 	mm_idx_t *mi;
+	char * vcf_with_variants;
 } pipeline_t;
 
 typedef struct {
@@ -355,15 +362,68 @@ static void *worker_pipeline(void *shared, int step, void *in)
 		} else free(s);
     } else if (step == 1) { // step 1: compute sketch
         step_t *s = (step_t*)in;
+
 		for (i = 0; i < s->n_seq; ++i) {
+			//printf("SEQ %d %s\n", s->n_seq, s->seq[i].name);
 			mm_bseq1_t *t = &s->seq[i];
 			if (t->l_seq > 0)
 				mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->flag&MM_I_HPC, &s->a);
 			else if (mm_verbose >= 2)
 				fprintf(stderr, "[WARNING] the length database sequence '%s' is 0\n", t->name);
+
+			if(p->vcf_with_variants && strcmp(p->vcf_with_variants, "")) {
+				mm_idx_manipulate_phased(p->mi, p->vcf_with_variants, &s->a, s->seq[i].name);
+			}
+
 			free(t->seq); free(t->name);
 		}
+
 		free(s->seq); s->seq = 0;
+
+		// sort by minimizer
+		radix_sort_128x(s->a.a, s->a.a + s->a.n);
+
+        // stay only unique
+		//#### why 16, sizeof(mm128_v)?
+        mm128_t *tmp = (mm128_t*)calloc(s->a.n, 16);
+
+
+		size_t ptr_p, ptr_tmp, beg_tmp, end_tmp;
+        tmp[0].x = s->a.a[0].x;
+        tmp[0].y = s->a.a[0].y;
+		beg_tmp = 0;
+		end_tmp = 1;
+
+		for (ptr_p = 1; ptr_p < s->a.n; ptr_p++) {
+			if(s->a.a[ptr_p].x == tmp[beg_tmp].x) {
+				int exist = 0;
+				for (ptr_tmp = beg_tmp; ptr_tmp < end_tmp; ptr_tmp++) {
+					if (s->a.a[ptr_p].y == tmp[ptr_tmp].y && s->a.a[ptr_p].x == tmp[ptr_tmp].x) {
+						exist = 1;
+						break;
+					}
+				}
+				if (exist == 0) {
+					tmp[end_tmp].x = s->a.a[ptr_p].x;
+					tmp[end_tmp].y = s->a.a[ptr_p].y;
+					end_tmp++;
+				}
+			}
+			else {
+				tmp[end_tmp].x = s->a.a[ptr_p].x;
+				tmp[end_tmp].y = s->a.a[ptr_p].y;
+				beg_tmp = end_tmp;
+				end_tmp++;
+			}
+		}
+
+        tmp = (mm128_t *) realloc(tmp, sizeof(mm128_t) * end_tmp);
+
+		//#### do we need realloc s->a.a ?
+        memcpy(s->a.a, tmp, end_tmp * 16);
+        free(tmp);
+        s->a.n = end_tmp;
+
 		return s;
     } else if (step == 2) { // dispatch sketch to buckets
         step_t *s = (step_t*)in;
@@ -373,7 +433,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
     return 0;
 }
 
-mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int flag, int mini_batch_size, int n_threads, uint64_t batch_size)
+mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int flag, int mini_batch_size, int n_threads, uint64_t batch_size, char * vcf_with_variants)
 {
 	pipeline_t pl;
 	if (fp == 0 || mm_bseq_eof(fp)) return 0;
@@ -382,8 +442,10 @@ mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int flag, int mini
 	pl.batch_size = batch_size;
 	pl.fp = fp;
 	pl.mi = mm_idx_init(w, k, b, flag);
+	pl.vcf_with_variants = vcf_with_variants;
 
 	kt_pipeline(n_threads < 3? n_threads : 3, worker_pipeline, &pl, 3);
+
 	if (mm_verbose >= 3)
 		fprintf(stderr, "[M::%s::%.3f*%.2f] collected minimizers\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0));
 
@@ -394,13 +456,13 @@ mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int flag, int mini
 	return pl.mi;
 }
 
-mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads) // a simpler interface; deprecated
+mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads, char * vcf_with_variants) // a simpler interface; deprecated
 {
 	mm_bseq_file_t *fp;
 	mm_idx_t *mi;
 	fp = mm_bseq_open(fn);
 	if (fp == 0) return 0;
-	mi = mm_idx_gen(fp, w, k, 14, flag, 1<<18, n_threads, UINT64_MAX);
+	mi = mm_idx_gen(fp, w, k, 14, flag, 1<<18, n_threads, UINT64_MAX, vcf_with_variants);
 	mm_bseq_close(fp);
 	return mi;
 }
@@ -459,6 +521,51 @@ mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const cha
  * index I/O *
  *************/
 
+void mm_idx_to_txt(FILE *fp, const mm_idx_t *mi)
+{
+    uint64_t sum_len = 0;
+    uint32_t x[5], i;
+    x[0] = mi->w, x[1] = mi->k, x[2] = mi->b, x[3] = mi->n_seq, x[4] = mi->flag;
+    fprintf(fp, "%s\n", MM_IDX_MAGIC);
+    fprintf(fp, "%u %u %u %u %u\n", x[0], x[1], x[2], x[3], x[4]);
+
+    for (i = 0; i < mi->n_seq; ++i) {
+        if (mi->seq[i].name) {
+            uint8_t l = strlen(mi->seq[i].name);
+            fprintf(fp, "%u\n", l);
+            fprintf(fp, "%s\n", mi->seq[i].name);
+        } else {
+            fprintf(fp, "0\n");
+        }
+        fprintf(fp, "%u\n", mi->seq[i].len);
+        sum_len += mi->seq[i].len;
+    }
+    for (i = 0; i < 1<<mi->b; ++i) {
+        mm_idx_bucket_t *b = &mi->B[i];
+        khint_t k;
+        idxhash_t *h = (idxhash_t*)b->h;
+        uint32_t size = h? h->size : 0;
+        fprintf(fp, "%i\n", b->n);
+        for (int j = 0; j < b->n; j++)
+            fprintf(fp, "\t%lu\n", b->p[j]);
+        fprintf(fp, "%u\n", size);
+        if (size == 0)
+            continue;
+        for (k = 0; k < kh_end(h); ++k) {
+            uint64_t x[3];
+            if (!kh_exist(h, k))
+                continue;
+            x[0] = kh_key(h, k), x[1] = kh_val(h, k);
+            fprintf(fp, "%lu\t%lu\n", x[0], x[1]);
+        }
+    }
+    if (!(mi->flag & MM_I_NO_SEQ)) {
+        for (int i = 0; i < (sum_len + 7) / 8; i++)
+            fprintf(fp, "%u\n", mi->S[i]);
+	}
+	fflush(fp);
+}
+
 void mm_idx_dump(FILE *fp, const mm_idx_t *mi)
 {
 	uint64_t sum_len = 0;
@@ -500,6 +607,67 @@ void mm_idx_dump(FILE *fp, const mm_idx_t *mi)
 	fflush(fp);
 }
 
+mm_idx_t *mm_idx_load_from_txt(FILE *fp)
+{
+	char magic[4];
+	uint32_t x[5], i;
+	uint64_t sum_len = 0;
+	mm_idx_t *mi;
+
+    fscanf(fp, "%s\n", &magic);
+
+	if (strncmp(magic, MM_IDX_MAGIC, 4) != 0) return 0;
+    fscanf(fp, "%u %u %u %u %u\n", &x[0], &x[1], &x[2], &x[3], &x[4]);
+
+	mi = mm_idx_init(x[0], x[1], x[2], x[4]);
+	mi->n_seq = x[3];
+	mi->seq = (mm_idx_seq_t*)kcalloc(mi->km, mi->n_seq, sizeof(mm_idx_seq_t));
+	for (i = 0; i < mi->n_seq; ++i) {
+		uint8_t l;
+		mm_idx_seq_t *s = &mi->seq[i];
+        fscanf(fp, "%u\n", &l);
+		if (l) {
+			s->name = (char*)kmalloc(mi->km, l + 1);
+            fscanf(fp, "%s\n", s->name);
+			s->name[l] = 0;
+		}
+        fscanf(fp, "%u\n", &s->len);
+		s->offset = sum_len;
+		s->is_alt = 0;
+		sum_len += s->len;
+	}
+	for (i = 0; i < 1<<mi->b; ++i) {
+		mm_idx_bucket_t *b = &mi->B[i];
+		uint32_t j, size;
+		khint_t k;
+		idxhash_t *h;
+        fscanf(fp, "%i\n", &b->n);
+		b->p = (uint64_t*)malloc(b->n * 8);
+        for (int j = 0; j < b->n; j++) {
+            fscanf(fp, "\t%lu\n", &b->p[j]);
+		}
+        fscanf(fp, "%u\n", &size);
+		if (size == 0) continue;
+		b->h = h = kh_init(idx);
+		kh_resize(idx, h, size);
+		for (j = 0; j < size; ++j) {
+			uint64_t x[2];
+			int absent;
+            fscanf(fp, "%lu\t%lu\n", &x[0], &x[1]);
+			k = kh_put(idx, h, x[0], &absent);
+			assert(absent);
+			kh_val(h, k) = x[1];
+		}
+	}
+	if (!(mi->flag & MM_I_NO_SEQ)) {
+		mi->S = (uint32_t*)malloc((sum_len + 7) / 8 * 4);
+        for (int i = 0; i < (sum_len + 7) / 8; i++)
+            fscanf(fp, "%u\n", &mi->S[i]);
+	}
+	return mi;
+}
+
+
 mm_idx_t *mm_idx_load(FILE *fp)
 {
 	char magic[4];
@@ -605,7 +773,7 @@ void mm_idx_reader_close(mm_idx_reader_t *r)
 	free(r);
 }
 
-mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
+mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads, char * vcf_with_variants)
 {
 	mm_idx_t *mi;
 	if (r->is_idx) {
@@ -613,9 +781,13 @@ mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
 		if (mi && mm_verbose >= 2 && (mi->k != r->opt.k || mi->w != r->opt.w || (mi->flag&MM_I_HPC) != (r->opt.flag&MM_I_HPC)))
 			fprintf(stderr, "[WARNING]\033[1;31m Indexing parameters (-k, -w or -H) overridden by parameters used in the prebuilt index.\033[0m\n");
 	} else
-		mi = mm_idx_gen(r->fp.seq, r->opt.w, r->opt.k, r->opt.bucket_bits, r->opt.flag, r->opt.mini_batch_size, n_threads, r->opt.batch_size);
+	{
+		mi = mm_idx_gen(r->fp.seq, r->opt.w, r->opt.k, r->opt.bucket_bits, r->opt.flag, r->opt.mini_batch_size, n_threads, r->opt.batch_size, vcf_with_variants);
+	}
 	if (mi) {
-		if (r->fp_out) mm_idx_dump(r->fp_out, mi);
+		if (r->fp_out) {
+			mm_idx_dump(r->fp_out, mi);
+		}
 		mi->index = r->n_parts++;
 	}
 	return mi;
diff --git a/linked_vcf_list.c b/linked_vcf_list.c
new file mode 100644
index 0000000..2cfc550
--- /dev/null
+++ b/linked_vcf_list.c
@@ -0,0 +1,323 @@
+// Copyright 2023 karpulevich
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <htslib/vcf.h>
+#include "mmpriv.h"
+#include "minimap.h"
+#include "linked_vcf_list.h"
+
+struct node *head = NULL;
+struct node *current = NULL;
+
+// display the list
+void printList(){
+    struct node *p = head;
+    printf("\n[");
+
+    //start from the beginning
+    while(p != NULL) {
+        printf(" %lu ",p->pos);
+        p = p->next;
+    }
+    printf("]");
+}
+
+// display the GT list
+void printGTList(bcf_hdr_t *hdr){
+    struct node *p = head;
+
+
+    //start from the beginning
+    while(p != NULL) {
+        int32_t *gt_arr = NULL, ngt_arr = 0;
+        int ngt;
+        ngt = bcf_get_genotypes(hdr, p->rec, &gt_arr, &ngt_arr);
+        if ( ngt > 0 ) {
+            int i, nsmpl = bcf_hdr_nsamples(hdr);
+            printf("%s %lu REF:%s ALT:%s\n", bcf_hdr_id2name(hdr, p->CHR_ID), p->pos, p->REF, p->ALT);
+
+            int max_ploidy = ngt/nsmpl;
+            for (i=0; i<nsmpl; i++)
+            {
+                int32_t *ptr = gt_arr + i*max_ploidy;
+                printf("%d|%d\t",bcf_gt_allele(ptr[0]), bcf_gt_allele(ptr[1]));
+            }
+            printf("\n");
+            free(gt_arr);
+        }
+
+        p = p->next;
+    }
+}
+
+int ifexists(char* z, const int u, const int s, char* v)
+{
+    for (int i = 0; i < u; i++)
+        if (!strncmp(&z[i * s], v, s)) {
+            return 1;
+        }
+    return 0;
+}
+
+void calculate_haplotypes(mm_idx_t * mi, bcf_hdr_t *hdr, struct node *window_start_pointer, struct node *current_pointer, unsigned long curr_pos, mm128_v *p) {
+    //Even though vcf uses 1-based indexing (i.e. first base is base 1), htslib internally uses 0-based indexing (i.e. bcf1_t::pos is 0 based).
+    //http://wresch.github.io/2014/11/18/process-vcf-file-with-htslib.html
+    //so we need use (pos + 1)
+    struct node *c_pointer = current_pointer;
+    struct node *w_start_pointer = window_start_pointer;
+
+    int MAX_SNP = 10;
+
+    char *gt_array = NULL;
+    int arr_i, arr_j, arr_len;
+
+    if (!(mi->flag & MM_PARSE_HT)) {
+        int snp_num = 0;
+        while (w_start_pointer != c_pointer) {
+            w_start_pointer = w_start_pointer->next;
+            snp_num += 1;
+        }
+        if (snp_num == MAX_SNP) {
+            return;
+        }
+        arr_i = (int) pow(2, snp_num);
+        arr_j = snp_num + 1;
+        gt_array = (char *) calloc(arr_i * arr_j, sizeof(char));
+        for (unsigned i = 0; i < (unsigned) pow(2, snp_num); i++) {
+            int k = 0;
+            for (int j = 1 << (snp_num - 1); j > 0; j = j / 2) {
+                gt_array[i * arr_j + k] = (i & j) ? '1' : '0';
+                k++;
+            }
+        }
+        arr_len = arr_i;
+    }
+    else {
+        MAX_SNP = 100;
+        int sample_num = bcf_hdr_nsamples(hdr);
+        int snp_num = 0;
+        arr_i = sample_num * 2;
+        arr_j = MAX_SNP + 1;
+        gt_array = (char *) calloc(arr_i * arr_j, sizeof(char));
+        while (w_start_pointer != c_pointer) {
+            int32_t *gt_arr = NULL, ngt_arr = 0;
+            int ngt = bcf_get_genotypes(hdr, w_start_pointer->rec, &gt_arr, &ngt_arr);
+
+            if (ngt > 0) {
+                int max_ploidy = ngt / sample_num;
+
+                for (int i = 0; i < sample_num; i++) {
+                    int32_t *ptr = gt_arr + i * max_ploidy;
+                    gt_array[i * max_ploidy * arr_j + snp_num] = bcf_gt_allele(ptr[0]) + '0';
+                    gt_array[(i * max_ploidy + 1) * arr_j + snp_num] = bcf_gt_allele(ptr[1]) + '0';
+                }
+                free(gt_arr);
+            }
+
+            w_start_pointer = w_start_pointer->next;
+            snp_num += 1;
+            if (snp_num > MAX_SNP) {
+                free(gt_array);
+                return;
+            }
+        }
+
+        //remove non unique strings
+        //https://subscription.packtpub.com/book/programming/9781838641108/1/ch01lvl1sec06/finding-the-unique-elements-in-an-array
+        int k = 1;
+        for (int i = 1; i < sample_num * 2; i++) {
+            if (!ifexists(gt_array, k, arr_j, &gt_array[i * arr_j])) {
+                if (i != k) memcpy(&gt_array[k * arr_j], &gt_array[i * arr_j], arr_j * sizeof(char));
+                k++;
+            }
+        }
+        arr_len = k;
+    }
+
+    //prepare SNP combinations
+    //Array format:
+    //REF_arr char array - REF (for control)
+    //ALT_arr char array - ALT
+    //POS_all ulong array - positions
+    //CHR - chromosome
+    //N_SNP - length
+    const char * CHR = bcf_hdr_id2name(hdr, window_start_pointer->CHR_ID);
+
+    for(int i = 0; i<arr_len; i++) {
+        struct node *local_c_pointer = current_pointer;
+        struct node *local_w_start_pointer = window_start_pointer;
+        int N_SNP = 0;
+        int local_snp_num = 0;
+
+        char * REF_arr[MAX_SNP];
+        char * ALT_arr[MAX_SNP];
+        unsigned long POS_all[MAX_SNP];
+
+        while (local_w_start_pointer != local_c_pointer) {
+            if(gt_array[i * arr_j + local_snp_num] == '1') {
+                REF_arr[N_SNP] = local_w_start_pointer->REF;
+                ALT_arr[N_SNP] = local_w_start_pointer->ALT;
+                POS_all[N_SNP] = (unsigned long)(local_w_start_pointer->pos + 1);
+                N_SNP += 1;
+            }
+
+            local_snp_num += 1;
+            local_w_start_pointer = local_w_start_pointer->next;
+        }
+        if (N_SNP > 0) {
+            add_variants(mi, CHR, REF_arr, ALT_arr, POS_all, N_SNP, curr_pos + 1, p);
+        }
+    }
+    free(gt_array);
+}
+
+void handleGTList(mm_idx_t * mi, bcf_hdr_t *hdr, mm128_v *p){
+    struct node *window_start_pointer = head;
+    struct node *current_pointer = head;
+    struct node *window_end_pointer = head;
+
+    int gap = mi->k - 1;
+
+    if (head == NULL)
+        return;
+
+    while(window_end_pointer->next != NULL)
+    {
+        while(window_end_pointer != NULL && window_end_pointer->pos > (current_pointer->pos - gap)){
+            window_end_pointer = window_end_pointer->next;
+        }
+
+        while(window_start_pointer->pos > (current_pointer->pos + gap)){
+            window_start_pointer = window_start_pointer->next;
+        }
+
+        //calculate start and end pointers
+        calculate_haplotypes(mi, hdr, window_start_pointer, window_end_pointer, current_pointer->pos, p);
+
+        current_pointer = current_pointer->next;
+        window_end_pointer = current_pointer;
+    }
+
+    //last SNP batch
+    while(window_start_pointer->pos > current_pointer->pos + gap){
+        window_start_pointer = window_start_pointer->next;
+    }
+
+    calculate_haplotypes(mi, hdr, window_start_pointer, window_end_pointer->next, current_pointer->pos, p);
+}
+
+//insertion at the beginning
+void insertatbegin(unsigned long pos, bcf1_t *rec, int CHR_ID, char * REF, char * ALT){
+    //create a link
+    struct node *lk = (struct node*) malloc(sizeof(struct node));
+    lk->pos = pos;
+    lk->rec = rec;
+    lk->CHR_ID = CHR_ID;
+    lk->REF = REF;
+    lk->ALT = ALT;
+
+    // point it to old first node
+    lk->next = head;
+
+    //point first to new first node
+    head = lk;
+}
+
+void insertatend(unsigned long pos, bcf1_t *rec, int CHR_ID, char * REF, char * ALT){
+    //create a link
+    struct node *lk = (struct node*) malloc(sizeof(struct node));
+    lk->pos = pos;
+    lk->rec = rec;
+    lk->CHR_ID = CHR_ID;
+    lk->REF = REF;
+    lk->ALT = ALT;
+
+    struct node *linkedlist = head;
+
+    // point it to old first node
+    while(linkedlist->next != NULL)
+        linkedlist = linkedlist->next;
+
+    //point first to new first node
+    linkedlist->next = lk;
+}
+void insertafternode(struct node *list, unsigned long pos, bcf1_t *rec, int CHR_ID, char * REF, char * ALT){
+    struct node *lk = (struct node*) malloc(sizeof(struct node));
+    lk->pos = pos;
+    lk->rec = rec;
+    lk->CHR_ID = CHR_ID;
+    lk->REF = REF;
+    lk->ALT = ALT;
+
+    lk->next = list->next;
+    list->next = lk;
+}
+
+void deleteatbegin(){
+    struct node *temp = head;
+    head = temp->next;
+    free(temp->ALT);
+    free(temp->REF);
+    bcf_destroy(temp->rec);
+    free(temp);
+}
+
+void deleteList(){
+    while (head != NULL)
+    {
+        deleteatbegin();
+    }
+}
+
+bool isListEmpty(){
+    return head == NULL;
+}
+
+void deletenode(int key){
+    struct node *temp = head, *prev;
+    if (temp != NULL && temp->pos == key) {
+        head = temp->next;
+        free(temp);
+        return;
+    }
+
+    // Find the key to be deleted
+    while (temp != NULL && temp->pos != key) {
+        prev = temp;
+        temp = temp->next;
+    }
+
+    // If the key is not present
+    if (temp == NULL) return;
+
+    // Remove the node
+    prev->next = temp->next;
+    free(temp);
+}
+
+int searchlist(int key){
+    struct node *temp = head;
+    while(temp != NULL) {
+        if (temp->pos == key) {
+            return 1;
+        }
+        temp=temp->next;
+    }
+    return 0;
+}
\ No newline at end of file
diff --git a/linked_vcf_list.h b/linked_vcf_list.h
new file mode 100644
index 0000000..0504865
--- /dev/null
+++ b/linked_vcf_list.h
@@ -0,0 +1,29 @@
+// Copyright 2023 karpulevich
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdbool.h>
+
+struct node {
+    unsigned long pos;
+    bcf1_t *rec;
+    int CHR_ID;
+    char * REF;
+    char * ALT;
+    struct node *next;
+};
+
+bool isListEmpty();
+void handleGTList(mm_idx_t * mi, bcf_hdr_t *hdr, mm128_v *p);
+void insertatbegin(unsigned long pos, bcf1_t *rec, int CHR_ID, char * REF, char * ALT);
+void deleteList();
diff --git a/main.c b/main.c
index 0be9933..93dbf04 100644
--- a/main.c
+++ b/main.c
@@ -78,6 +78,8 @@ static ko_longopt_t long_options[] = {
 	{ "chain-skip-scale",ko_required_argument,351 },
 	{ "print-chains",   ko_no_argument,       352 },
 	{ "no-hash-name",   ko_no_argument,       353 },
+	{ "vcf-file-with-variants",ko_required_argument,354 },
+    { "parse-haplotype",ko_no_argument,      355 },
 	{ "help",           ko_no_argument,       'h' },
 	{ "max-intron-len", ko_required_argument, 'G' },
 	{ "version",        ko_no_argument,       'V' },
@@ -127,6 +129,7 @@ int main(int argc, char *argv[])
 	mm_idxopt_t ipt;
 	int i, c, n_threads = 3, n_parts, old_best_n = -1;
 	char *fnw = 0, *rg = 0, *junc_bed = 0, *s, *alt_list = 0;
+	char *vcf_with_variants = 0;
 	FILE *fp_help = stderr;
 	mm_idx_reader_t *idx_rdr;
 	mm_idx_t *mi;
@@ -237,6 +240,8 @@ int main(int argc, char *argv[])
 		else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
 		else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
 		else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
+		else if (c == 354) vcf_with_variants = o.arg; // --vcf-file-with-variants
+        else if (c == 355) ipt.flag |= MM_PARSE_HT;  // --parse-haplotype
 		else if (c == 330) {
 			fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
 		} else if (c == 314) { // --frag
@@ -324,6 +329,8 @@ int main(int argc, char *argv[])
 		fprintf(fp_help, "    -w INT       minimizer window size [%d]\n", ipt.w);
 		fprintf(fp_help, "    -I NUM       split index for every ~NUM input bases [4G]\n");
 		fprintf(fp_help, "    -d FILE      dump index to FILE []\n");
+		fprintf(fp_help, "    --vcf-file-with-variants FILE      pass VCF FILE to modify index []\n");
+        fprintf(fp_help, "    --parse-haplotype parse haplotypes from VCF to generate real SNP combinations, otherwise use all.\n");
 		fprintf(fp_help, "  Mapping:\n");
 		fprintf(fp_help, "    -f FLOAT     filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
 		fprintf(fp_help, "    -g NUM       stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
@@ -386,7 +393,7 @@ int main(int argc, char *argv[])
 	}
 	if (opt.best_n == 0 && (opt.flag&MM_F_CIGAR) && mm_verbose >= 2)
 		fprintf(stderr, "[WARNING]\033[1;31m `-N 0' reduces alignment accuracy. Please use --secondary=no to suppress secondary alignments.\033[0m\n");
-	while ((mi = mm_idx_reader_read(idx_rdr, n_threads)) != 0) {
+	while ((mi = mm_idx_reader_read(idx_rdr, n_threads, vcf_with_variants)) != 0) {
 		int ret;
 		if ((opt.flag & MM_F_CIGAR) && (mi->flag & MM_I_NO_SEQ)) {
 			fprintf(stderr, "[ERROR] the prebuilt index doesn't contain sequences.\n");
diff --git a/minimap.h b/minimap.h
index 13e12e0..828de04 100644
--- a/minimap.h
+++ b/minimap.h
@@ -44,6 +44,7 @@
 #define MM_I_HPC          0x1
 #define MM_I_NO_SEQ       0x2
 #define MM_I_NO_NAME      0x4
+#define MM_PARSE_HT       0x8
 
 #define MM_IDX_MAGIC   "MMI\2"
 
@@ -247,7 +248,7 @@ mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, cons
  *
  * @return an index on success; NULL if reaching the end of the input file
  */
-mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads);
+mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads, char * vcf_with_variants);
 
 /**
  * Destroy/deallocate an index reader
@@ -401,7 +402,7 @@ int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uin
 
 // deprecated APIs for backward compatibility
 void mm_mapopt_init(mm_mapopt_t *opt);
-mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads);
+mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads, char * vcf_with_variants);
 
 #ifdef __cplusplus
 }
diff --git a/mmpriv.h b/mmpriv.h
index 2f5034b..254e694 100644
--- a/mmpriv.h
+++ b/mmpriv.h
@@ -6,6 +6,12 @@
 #include "bseq.h"
 #include "kseq.h"
 
+#include <htslib/hts.h>
+#include <htslib/vcf.h>
+#include <htslib/tbx.h>
+#include <htslib/kstring.h>
+#include <htslib/kseq.h>
+
 #define MM_PARENT_UNSET   (-1)
 #define MM_PARENT_TMP_PRI (-2)
 
@@ -60,6 +66,10 @@ void radix_sort_64(uint64_t *beg, uint64_t *end);
 uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
 
 void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
+void mm_idx_manipulate(mm_idx_t * mi, char *vcf_with_variants);
+void mm_idx_manipulate_phased(mm_idx_t * mi, char * fname, mm128_v *p, char * contig_name);
+void add_variants(mm_idx_t * mi, const char * CHR, char ** REF_arr, char ** ALT_arr, unsigned long * POS_all, int N_SNP, unsigned long curr_pos, mm128_v *p);
+void add_indel(mm_idx_t * mi, const char * CHR, char * REF, char * ALT, unsigned long curr_pos, unsigned long indel_pos, mm128_v *p, const char * original_ref_seq);
 
 mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos);
 void mm_seed_mz_flt(void *km, mm128_v *mv, int32_t q_occ_max, float q_occ_frac);
diff --git a/sketch.c b/sketch.c
index f830693..2d898a8 100644
--- a/sketch.c
+++ b/sketch.c
@@ -6,6 +6,14 @@
 #include "kvec.h"
 #include "mmpriv.h"
 
+#include "linked_vcf_list.h"
+
+#include <htslib/hts.h>
+#include <htslib/vcf.h>
+#include <htslib/tbx.h>
+#include <htslib/kstring.h>
+#include <htslib/kseq.h>
+
 unsigned char seq_nt4_table[256] = {
 	0, 1, 2, 3,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
@@ -81,7 +89,7 @@ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, i
 	mm128_t buf[256], min = { UINT64_MAX, UINT64_MAX };
 	tiny_queue_t tq;
 
-	assert(len > 0 && (w > 0 && w < 256) && (k > 0 && k <= 28)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice
+	assert(len > 0 && (w > 0 && w < 256) && (k > 0 && k <= 128)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice
 	memset(buf, 0xff, w * 16);
 	memset(&tq, 0, sizeof(tiny_queue_t));
 	kv_resize(mm128_t, km, *p, p->n + len/w);
@@ -141,3 +149,364 @@ void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, i
 	if (min.x != UINT64_MAX)
 		kv_push(mm128_t, km, *p, min);
 }
+
+void read_vcf(mm_idx_t * mi, char * fname, mm128_v *p, char * contig_name)
+{
+    int ret;
+
+    kstring_t str = {0,0,0};
+
+    //open vcf file
+    htsFile *fp    = hts_open(fname,"rb");
+
+    //read header
+    bcf_hdr_t *hdr = bcf_hdr_read(fp);
+    bcf1_t *rec    = bcf_init();
+
+    tbx_t *idx = tbx_index_load(fname);
+
+    if(!idx) {
+        //printf("Null index\n");
+        return;
+    }
+
+    //printf("%s\n", contig_name);
+    hts_itr_t *itr = tbx_itr_querys(idx, contig_name);
+
+    if(!itr) {
+        //printf("Null iterator for contig_name %s\n", contig_name);
+        return;
+    }
+
+    while ((ret = tbx_itr_next(fp, idx, itr, &str)) > 0) {
+        vcf_parse(&str, hdr, rec);
+        bcf_unpack(rec, BCF_UN_STR);
+        bcf_unpack(rec, BCF_UN_INFO);
+
+        bcf1_t *rec_tmp = bcf_dup(rec);
+        char * REF = (char *)calloc(mi->k + 1, sizeof(char));
+        strncpy(REF, rec->d.allele[0], mi->k + 1);
+        REF[mi->k + 1] = '\0';
+        char * ALT = (char *)calloc(mi->k + 1, sizeof(char));
+        strncpy(ALT, rec->d.allele[1], mi->k + 1);
+        ALT[mi->k + 1] = '\0';
+
+        insertatbegin((unsigned long)rec_tmp->pos, rec_tmp, rec_tmp->rid, REF, ALT);
+        bcf_empty(rec);
+
+    }
+
+    if(!isListEmpty()){
+        //printf("CHR = %s;\n", contig_name);
+        handleGTList(mi, hdr, p);
+
+        deleteList();
+    }
+
+    bcf_itr_destroy(itr);
+    tbx_destroy(idx);
+    bcf_hdr_destroy(hdr);
+
+    if ( (ret=hts_close(fp)) )
+    {
+        fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
+        exit(ret);
+    }
+}
+
+void mm_idx_manipulate_phased(mm_idx_t * mi, char * fname, mm128_v *p, char * contig_name) {
+    read_vcf(mi, fname, p, contig_name);
+}
+
+//REF - REF (for control)
+//ALT - ALT
+//curr_pos ulong - position
+//CHR - chromosome
+void add_indel(mm_idx_t * mi, const char * CHR, char * REF, char * ALT, unsigned long curr_pos, unsigned long indel_pos, mm128_v *p, const char * original_ref_seq)
+{
+    const char *contig_name = CHR;
+    const unsigned long center_position = curr_pos;
+    //const unsigned long position = indel_pos;
+    const unsigned long position = curr_pos;
+
+    //Find seq
+    uint64_t contig_offset = 0;
+    int seq_num = -1;
+    for (int i = 0; i < mi->n_seq; i++) {
+        if (strcmp(contig_name, mi->seq[i].name) == 0) {
+            contig_offset = mi->seq[i].offset;
+            seq_num = i;
+        }
+    }
+    // Error if no contigs in fasta
+    if(seq_num == -1) {
+        printf("ERROR Contig %s id not found in reference\n", contig_name);
+        return;
+    }
+
+    int SIDE_SIZE = (mi->k - 1) + mi->w;
+    // Calculate number of chunks:
+    // side chunks: take k-mer size, subtract 1 and add window size
+    // divided by chunk size and multiplied by 2 as it has 2 sides, and one for center
+    int SEQ_CHUNK_NUMBER = SIDE_SIZE / 8 * 2 + 1;
+    // add extra two side chunks if (mi->k - 1 + 10) is not a multiple of 8
+    int EXTRA_GAP = (8 - SIDE_SIZE % 8) % 8;
+    SEQ_CHUNK_NUMBER = (EXTRA_GAP) ? SEQ_CHUNK_NUMBER + 2 : SEQ_CHUNK_NUMBER;
+
+    int ref_len = strlen(REF);
+    int alt_len = strlen(ALT);
+
+    if(ref_len == 1 && alt_len > 1 && alt_len < mi->k) {
+        char * new_ref_seq;
+        new_ref_seq = (char*)malloc(sizeof(char) * (SEQ_CHUNK_NUMBER * 8 + 1 + (alt_len - 1)));
+        memcpy(new_ref_seq, original_ref_seq, SEQ_CHUNK_NUMBER * 8 + 1);
+
+        memcpy(new_ref_seq, original_ref_seq, EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8);
+
+        new_ref_seq[EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8] = '\0';
+        new_ref_seq = strcat(new_ref_seq, ALT);
+        new_ref_seq = strcat(new_ref_seq, &original_ref_seq[EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8 + 1]);
+
+        //Finds minimizer in window
+        mm128_v minimizer_array = {0, 0, 0};
+                mm_sketch(0, &new_ref_seq[EXTRA_GAP + (contig_offset + center_position - 1) % 8], SIDE_SIZE * 2 + 1 + (alt_len - 1), mi->w, mi->k,
+                        0, mi->flag & MM_I_HPC, &minimizer_array);
+
+        for (int i = 0; i < minimizer_array.n; i++) {
+            if (minimizer_array.a[i].y < SIDE_SIZE * 2) continue;
+            if (minimizer_array.a[i].y > (SIDE_SIZE + mi->k) * 2 - 1 + (alt_len - 1) * 2) continue;
+            minimizer_array.a[i].y = ((uint64_t)seq_num << 32) + (center_position - SIDE_SIZE - 1 + minimizer_array.a[i].y / 2) * 2 +
+                                        (minimizer_array.a[i].y % 2);
+
+            kv_push(mm128_t, 0, *p, minimizer_array.a[i]);
+        }
+    } else if (ref_len > 1 && alt_len == 1 && ref_len < mi->k) {
+        int EXT_CHUNK_COUNT = (ref_len - 2) / 8 + 1;
+        char * original_ref_seq_ext = (char*)malloc(sizeof(char) * (8 * EXT_CHUNK_COUNT + 1));
+        original_ref_seq_ext[8 * EXT_CHUNK_COUNT] = '\0';
+        for (int i = 0; i < EXT_CHUNK_COUNT; i++) {
+            uint32_t tmp_seq;
+            for (int i = 0; i < EXT_CHUNK_COUNT; i++) {
+                if (
+                    // Out of bounds
+                        (contig_offset == 0 && (position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 1 < 0) ||
+                        ((contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 1) * 8 >= contig_offset + mi->seq[seq_num].len ||
+                        ((contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 2) * 8 <= contig_offset
+                        )
+                    tmp_seq = 1145324612; // ALL N
+                else {
+                    tmp_seq = mi->S[(contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 1];
+                    // At left bound
+                    if (((contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 1) * 8 < contig_offset &&
+                        ((contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 2) * 8 > contig_offset) {
+                        if (contig_offset % 8 == 0)
+                            tmp_seq = 1145324612; // ALL N
+                        else {
+                            tmp_seq = tmp_seq >> (4 * (contig_offset % 8));
+                            for (int j = 0; j < contig_offset % 8; j++)
+                                tmp_seq = (tmp_seq << 4) + 4;
+                        }
+                    }
+                    // At right bound
+                    if (((contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 1) * 8 < contig_offset + mi->seq[seq_num].len &&
+                        ((contig_offset + position - 1) / 8 + (SEQ_CHUNK_NUMBER / 2) + i + 2) * 8 > contig_offset + mi->seq[seq_num].len) {
+                        tmp_seq = tmp_seq << (4 * (8 - (contig_offset + mi->seq[seq_num].len) % 8));
+                        for (int j = 0; j < 8 - (contig_offset + mi->seq[seq_num].len) % 8; j++)
+                            tmp_seq = (tmp_seq >> 4) | 1073741824; // FIRST N
+                    }
+                }
+            }
+
+            for (int j = 0; j < 8; j++) {
+                uint32_t tmp = tmp_seq % 16;
+                switch (tmp) {
+                    case 0:
+                        original_ref_seq_ext[i * 8 + j] = 'A';
+                        break;
+                    case 1:
+                        original_ref_seq_ext[i * 8 + j] = 'C';
+                        break;
+                    case 2:
+                        original_ref_seq_ext[i * 8 + j] = 'G';
+                        break;
+                    case 3:
+                        original_ref_seq_ext[i * 8 + j] = 'T';
+                        break;
+                    case 4:
+                        original_ref_seq_ext[i * 8 + j] = 'N';
+                }
+                tmp_seq = tmp_seq / 16;
+            }
+        }
+
+        //Create new window
+        char * new_ref_seq = (char*)malloc(sizeof(char) * (SEQ_CHUNK_NUMBER * 8 + 1 - ref_len + 1 + EXT_CHUNK_COUNT * 8));
+        memcpy(new_ref_seq, original_ref_seq, EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8);
+        new_ref_seq[EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8] = ALT[0];
+        new_ref_seq[EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8 + 1] = '\0';
+        new_ref_seq = strcat(new_ref_seq, &original_ref_seq[EXTRA_GAP + SIDE_SIZE + (contig_offset + position - 1) % 8 + ref_len]);
+        new_ref_seq = strcat(new_ref_seq, original_ref_seq_ext);
+        
+        //Finds minimizer in window
+        mm128_v minimizer_array = {0, 0, 0};
+        mm_sketch(0, &new_ref_seq[(contig_offset + center_position - 1) % 8], SIDE_SIZE * 2 + 1, mi->w, mi->k,
+                    0, mi->flag & MM_I_HPC, &minimizer_array);
+
+
+        free(original_ref_seq_ext);
+        free(new_ref_seq);
+
+        for (int i = 0; i < minimizer_array.n; i++) {
+            if (minimizer_array.a[i].y < SIDE_SIZE * 2 + 2) continue; // TODO change 50 to 48 for similarity with SNPS (it will take extra calculation but no changes)
+            if (minimizer_array.a[i].y > (SIDE_SIZE + mi->k) * 2 - 1) continue;
+            minimizer_array.a[i].y = ((uint64_t)seq_num << 32) + (center_position - SIDE_SIZE - 1 + minimizer_array.a[i].y / 2) * 2 +
+                                        (minimizer_array.a[i].y % 2) + (ref_len - 1) * 2;
+            kv_push(mm128_t, 0, *p, minimizer_array.a[i]);
+        }
+    }
+}
+
+//Array format:
+//REF_arr char array - REF (for control)
+//ALT_arr char array - ALT
+//POS_all ulong array - positions
+//CHR - chromosome
+//N_SNP - length
+void add_variants(mm_idx_t * mi, const char * CHR, char ** REF_arr, char ** ALT_arr, unsigned long * POS_all, int N_SNP, unsigned long curr_pos, mm128_v *p)
+{
+    if (N_SNP == 0)
+        return;
+
+    const char *snp_contig_name = CHR;
+    const unsigned long snp_position = curr_pos;
+
+    //Find seq
+    uint64_t contig_offset;
+    int seq_num = -1;
+    for (int i = 0; i < mi->n_seq; i++) {
+        if (strcmp(snp_contig_name, mi->seq[i].name) == 0) {
+            contig_offset = mi->seq[i].offset;
+            seq_num = i;
+        }
+    }
+    //Error if no contigs in fasta
+    if(seq_num == -1) {
+        printf("ERROR Contig %s id not found in reference\n", snp_contig_name);
+        return;
+    }
+
+    int SIDE_SIZE = (mi->k - 1) + mi->w;
+    // Calculate number of chunks:
+    // side chunks: take k-mer size, subtract 1 and add window size
+    // divided by chunk size and multiplied by 2 as it has 2 sides, and one for center
+    int SEQ_CHUNK_NUMBER = SIDE_SIZE / 8 * 2 + 1;
+    // add extra two side chunks if (mi->k - 1 + 10) is not a multiple of 8
+    int EXTRA_GAP = (8 - SIDE_SIZE % 8) % 8;
+    SEQ_CHUNK_NUMBER = (EXTRA_GAP) ? SEQ_CHUNK_NUMBER + 2 : SEQ_CHUNK_NUMBER;
+    uint32_t seq[SEQ_CHUNK_NUMBER];
+
+    for (int i = 0; i < SEQ_CHUNK_NUMBER; i++) {
+        if (
+                // Out of bounds
+                (contig_offset == 0 && (snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i < 0) ||
+                ((contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i) * 8 >= contig_offset + mi->seq[seq_num].len ||
+                ((contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i + 1) * 8 <= contig_offset
+                )
+            seq[i] = 1145324612; // ALL N
+        else {
+            seq[i] = mi->S[(contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i];
+            // At left bound
+            if (((contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i) * 8 < contig_offset &&
+                ((contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i + 1) * 8 > contig_offset) {
+                if (contig_offset % 8 == 0)
+                    seq[i] = 1145324612; // ALL N
+                else {
+                    seq[i] = seq[i] >> (4 * (contig_offset % 8));
+                    for (int j = 0; j < contig_offset % 8; j++)
+                        seq[i] = (seq[i] << 4) + 4;
+                }
+            }
+            // At right bound
+            if (((contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i) * 8 < contig_offset + mi->seq[seq_num].len &&
+                ((contig_offset + snp_position - 1) / 8 - (SEQ_CHUNK_NUMBER / 2) + i + 1) * 8 > contig_offset + mi->seq[seq_num].len) {
+                seq[i] = seq[i] << (4 * (8 - (contig_offset + mi->seq[seq_num].len) % 8));
+                for (int j = 0; j < 8 - (contig_offset + mi->seq[seq_num].len) % 8; j++)
+                    seq[i] = (seq[i] >> 4) | 1073741824; // FIRST N
+            }
+        }
+    }
+
+    char original_ref_seq[SEQ_CHUNK_NUMBER * 8 + 1];
+    original_ref_seq[SEQ_CHUNK_NUMBER * 8] = '\0';
+    for (int i = 0; i < SEQ_CHUNK_NUMBER; i++) {
+        uint32_t tmp_seq = seq[i];
+        for (int j = 0; j < 8; j++) {
+            uint32_t tmp = tmp_seq % 16;
+            switch (tmp) {
+                case 0:
+                    original_ref_seq[i * 8 + j] = 'A';
+                    break;
+                case 1:
+                    original_ref_seq[i * 8 + j] = 'C';
+                    break;
+                case 2:
+                    original_ref_seq[i * 8 + j] = 'G';
+                    break;
+                case 3:
+                    original_ref_seq[i * 8 + j] = 'T';
+                    break;
+                case 4:
+                    original_ref_seq[i * 8 + j] = 'N';
+            }
+            tmp_seq = tmp_seq / 16;
+        }
+    }
+
+
+    char new_ref_seq[SEQ_CHUNK_NUMBER * 8 + 1];
+    memcpy(new_ref_seq, original_ref_seq, SEQ_CHUNK_NUMBER * 8 + 1);
+
+    int has_indel = -1;
+    int indel_count = 0;
+    for (int i = N_SNP - 1; i >= 0; i--) {
+        //add single SNP
+        if ((strlen(REF_arr[i]) == 1) && (strlen(ALT_arr[i]) == 1)) {
+            new_ref_seq[(EXTRA_GAP + SIDE_SIZE + (contig_offset + snp_position - 1) % 8) +
+                        (POS_all[i] - snp_position)] = ALT_arr[i][0];// - 'A' + 'a';
+        } else {
+            has_indel = i;
+            indel_count++;
+        }
+    }
+    if ((indel_count == 1) && (POS_all[has_indel] == curr_pos)) {
+        if ((strlen(REF_arr[has_indel]) > mi->k) || (strlen(ALT_arr[has_indel]) > mi->k)) {
+            return;
+        }
+        return;
+        if ((strlen(REF_arr[has_indel]) > 1) && (strlen(ALT_arr[has_indel]) == 1)) {
+            add_indel(mi, CHR, REF_arr[has_indel], ALT_arr[has_indel], curr_pos,  POS_all[has_indel], p, new_ref_seq);
+            return;
+        }
+        if ((strlen(REF_arr[has_indel]) == 1) && (strlen(ALT_arr[has_indel]) > 1)) {
+            add_indel(mi, CHR, REF_arr[has_indel], ALT_arr[has_indel], curr_pos,  POS_all[has_indel], p, new_ref_seq);
+            return;
+        }
+    }
+    if (indel_count == N_SNP) {
+        return;
+    }
+
+    //Finds minimizer in window
+    mm128_v minimizer_array = {0, 0, 0};
+            mm_sketch(0, &new_ref_seq[EXTRA_GAP + (contig_offset + snp_position - 1) % 8], SIDE_SIZE * 2 + 1, mi->w, mi->k,
+                      0, mi->flag & MM_I_HPC, &minimizer_array);
+
+    for (int i = 0; i < minimizer_array.n; i++) {
+        if (minimizer_array.a[i].y < SIDE_SIZE * 2) continue;
+        if (minimizer_array.a[i].y > (SIDE_SIZE + mi->k) * 2 - 1) continue;
+        minimizer_array.a[i].y = ((uint64_t)seq_num << 32) + (snp_position - SIDE_SIZE - 1 + minimizer_array.a[i].y / 2) * 2 +
+                                    (minimizer_array.a[i].y % 2);
+
+        kv_push(mm128_t, 0, *p, minimizer_array.a[i]);
+    }
+}
diff --git a/test/README.md b/test/README.md
new file mode 100644
index 0000000..01ee755
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,32 @@
+## Tests for minimap2_index_modiifer
+There are three tests to check base functionality.
+
+### Common test
+This test create two index files: regular and modified.
+```bash
+minimap2 -d test/test.mni test/test.fasta
+minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_not_the_same.vcf.gz test/test.fasta
+diff test/test.mni test/test.modified.mni
+```
+
+Modified index file should contains extra string and some strings should be mismatched.
+
+### Empty VCF test
+If input VCF file contains no variants from reference modified index would be same as regular one.
+```bash
+minimap2 -d test/test.mni test/test.fasta
+minimap2 -d test/test.modified.mni --vcf-file-with-variants test/empty.vcf.gz test/test.fasta
+diff test/test.mni test/test.modified.mni
+```
+
+Index files should be the same (empty output after third command).
+
+### Pseudo-variants test
+In this test `test/test_long_chr1_the_same.vcf.gz` contains pseudo-variants (like A -> A, C -> C, etc). This variants would be processed with no effect.
+```bash
+minimap2 -d test/test.mni test/test.fasta
+minimap2 -d test/test.modified.mni --vcf-file-with-variants test/test_long_chr1_the_same.vcf.gz test/test.fasta
+diff test/test.mni test/test.modified.mni
+```
+
+Index files should be the same.
diff --git a/test/empty.vcf.gz b/test/empty.vcf.gz
new file mode 100644
index 0000000..2c6e5b9
Binary files /dev/null and b/test/empty.vcf.gz differ
diff --git a/test/empty.vcf.gz.tbi b/test/empty.vcf.gz.tbi
new file mode 100644
index 0000000..b9e0990
Binary files /dev/null and b/test/empty.vcf.gz.tbi differ
diff --git a/test/test.1.fq b/test/test.1.fq
new file mode 100644
index 0000000..09c5d87
--- /dev/null
+++ b/test/test.1.fq
@@ -0,0 +1,36 @@
+@111\1
+TTTTTTTTTTTTTTATTTTATTTTTTTCACACAAACACACACACACACAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@112\1
+TTTTTTTATTTTATTTTTTTTTCACACAAACACACACACACACAAAAAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@113\1
+TTTTTTTTTTATTTTATTTTTTTTCACACAAACACACACACACACAAAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@114\1
+AAAAAAAAAAAAAAAAAAAAAAAAATAAAAACGTACGACGTACGTTTTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@115\1
+AAAAAAAAAAAAAAAAAAAAAAATAAAAACGTACGACGTACGTTTTTTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@116\1
+AAAAAAAAAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTATTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@117\1
+ACACACAAAAAAATATATATATATATATATAAATATATTATATATATATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@118\1
+ATATATATATAAATATATATATATATATATATATATATATATATATATAT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@119\1
+TATATATATATATATATATATATATATATATATAAATATATATATATAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/test/test.2.fq b/test/test.2.fq
new file mode 100644
index 0000000..6874907
--- /dev/null
+++ b/test/test.2.fq
@@ -0,0 +1,36 @@
+@111\2
+AAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTATTTTATTTTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@112\2
+AAAAAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTATTTTATTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@113\2
+TAAAAAAAAAAAAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@114\2
+TTTTATTTTATTTTTTTGGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@115\2
+TTTTTTGGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTATATATGGGGG
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@116\2
+GGGGGGGGGTTTTTTTTTTTTTTATATATGGGGGGGGTTTTTTTAAAAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@117\2
+ATATATATATATATAAATATATATATATAAAAAAAAAAAAAAAATAAAAA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@118\2
+AATATATATATATATATATATATATATATATATATATATATAAATATATA
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@119\2
+GGGGGGGGGGGGGGGTTTTTTTTTTTTTTATATATGGGGGGGGTTTTTTT
++
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
\ No newline at end of file
diff --git a/test/test.fasta b/test/test.fasta
new file mode 100644
index 0000000..b9d552f
--- /dev/null
+++ b/test/test.fasta
@@ -0,0 +1,12 @@
+>chr1
+NNNNNNNNNNNTTTTTTTTTTTTTTTTTTTATTTTATTTTTTTTCACACAAACACACACACACACAAAAAAATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAAATATATAATATATAAAAAAAAAAAAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTTTTTTATTTTTTTGGGGGGGGAGGGGGGGGGGTTTTTTTTTTTTTTATATATGGGGGGGGTTTTTTTAAAAAAAAATTTTTTTGGGGGGGGAGGGGGGGGGGTTTTTTTTTTTTTTNNNNNNNNNN
+>chr2
+NNNNNNNNNNNTTTTTTTTTTTTTTTTTTTATTTTATTTTTTTTCACACAAACACACACACACACAAAAA
+AATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAAATATATA
+ATATATAAAAAAAAAAAAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTTTTTTATTTTTTTGGG
+GGGGGGGGGGGGGGGGTTTTTTTTTTTTTTATATATGGGGGGGGTTTTTTTAAAAAAAAANNNNNNNNNN
+>chr3
+NNNNAGTAGTCTTTTTTTTTTTTTTTTTTTATTTTATTTTTTTTCACACAAACACACACACACACAAAAA
+AATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAAATATATA
+ATATATAAAAAAAAAAAAAATAAAAACGTACGACGTACGTTTTTTTTTTTTTTTTTTTTATTTTTTTGGG
+GGGGGGGGGGGGGGGGTTTTTTTTTTTTTTATATATGGGGGGGGTTTTTTTAAAAAAAAAGGGGGGGGGG
diff --git a/test/test.vcf b/test/test.vcf
new file mode 100644
index 0000000..c009525
--- /dev/null
+++ b/test/test.vcf
@@ -0,0 +1,5 @@
+chr3	277	.	G	A	X
+chr3	6	.	G	C	X
+chr2	195	.	T	A	X
+chr2	146	.	T	A	X
+chr2	91	.	T	A	X
diff --git a/test/test_long_chr1.vcf.gz b/test/test_long_chr1.vcf.gz
new file mode 100644
index 0000000..59fd971
Binary files /dev/null and b/test/test_long_chr1.vcf.gz differ
diff --git a/test/test_long_chr1.vcf.gz.tbi b/test/test_long_chr1.vcf.gz.tbi
new file mode 100644
index 0000000..25b601d
Binary files /dev/null and b/test/test_long_chr1.vcf.gz.tbi differ
diff --git a/test/test_long_chr1_not_the_same.vcf.gz b/test/test_long_chr1_not_the_same.vcf.gz
new file mode 100644
index 0000000..20647e5
Binary files /dev/null and b/test/test_long_chr1_not_the_same.vcf.gz differ
diff --git a/test/test_long_chr1_not_the_same.vcf.gz.tbi b/test/test_long_chr1_not_the_same.vcf.gz.tbi
new file mode 100644
index 0000000..1cdb036
Binary files /dev/null and b/test/test_long_chr1_not_the_same.vcf.gz.tbi differ
diff --git a/test/test_long_chr1_the_same.vcf.gz b/test/test_long_chr1_the_same.vcf.gz
new file mode 100644
index 0000000..d92ccfe
Binary files /dev/null and b/test/test_long_chr1_the_same.vcf.gz differ
diff --git a/test/test_long_chr1_the_same.vcf.gz.tbi b/test/test_long_chr1_the_same.vcf.gz.tbi
new file mode 100644
index 0000000..3fe3431
Binary files /dev/null and b/test/test_long_chr1_the_same.vcf.gz.tbi differ