55import numpy as np
66import zarr
77
8- from bio2zarr import constants , vcz
8+ from bio2zarr import constants , core , vcz
99
1010logger = logging .getLogger (__name__ )
1111
@@ -18,6 +18,9 @@ def __init__(self, path):
1818 self .samples = [vcz .Sample (id = sample ) for sample in self .bed .iid ]
1919 self .num_samples = len (self .samples )
2020 self .root_attrs = {}
21+ self .contigs = [
22+ vcz .Contig (id = str (chrom )) for chrom in np .unique (self .bed .chromosome )
23+ ]
2124
2225 def iter_alleles (self , start , stop , num_alleles ):
2326 ref_field = self .bed .allele_1
@@ -32,6 +35,11 @@ def iter_alleles(self, start, stop, num_alleles):
3235 alleles [1 : 1 + len (alt )] = alt
3336 yield alleles
3437
38+ def iter_contig (self , start , stop ):
39+ chrom_to_contig_index = {contig .id : i for i , contig in enumerate (self .contigs )}
40+ for chrom in self .bed .chromosome [start :stop ]:
41+ yield chrom_to_contig_index [str (chrom )]
42+
3543 def iter_field (self , field_name , shape , start , stop ):
3644 assert field_name == "position" # Only position field is supported from plink
3745 yield from self .bed .bp_position [start :stop ]
@@ -88,6 +96,15 @@ def generate_schema(
8896 chunks = [schema_instance .variants_chunk_size , 2 ],
8997 description = None ,
9098 ),
99+ vcz .ZarrArraySpec .new (
100+ vcf_field = None ,
101+ name = "variant_contig" ,
102+ dtype = core .min_int_dtype (0 , len (np .unique (self .bed .chromosome ))),
103+ shape = [m ],
104+ dimensions = ["variants" ],
105+ chunks = [schema_instance .variants_chunk_size ],
106+ description = "Contig/chromosome index for each variant" ,
107+ ),
91108 vcz .ZarrArraySpec .new (
92109 vcf_field = None ,
93110 name = "call_genotype_phased" ,
@@ -159,9 +176,7 @@ def convert(
159176 show_progress = show_progress ,
160177 )
161178 vzw .finalise (show_progress )
162-
163- # TODO - index code needs variant_contig
164- # vzw.create_index()
179+ vzw .create_index ()
165180
166181
167182# FIXME do this more efficiently - currently reading the whole thing
0 commit comments