55import numpy as np
66import zarr
77
8- from bio2zarr import constants , schema , writer
8+ from bio2zarr import constants , vcz
99
1010logger = logging .getLogger (__name__ )
1111
@@ -58,12 +58,12 @@ def generate_schema(
5858 m = self .bed .sid_count
5959 logging .info (f"Scanned plink with { n } samples and { m } variants" )
6060
61- schema_instance = schema .VcfZarrSchema (
62- format_version = schema .ZARR_SCHEMA_FORMAT_VERSION ,
61+ schema_instance = vcz .VcfZarrSchema (
62+ format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
6363 samples_chunk_size = samples_chunk_size ,
6464 variants_chunk_size = variants_chunk_size ,
6565 fields = [],
66- samples = [schema .Sample (id = sample ) for sample in self .bed .iid ],
66+ samples = [vcz .Sample (id = sample ) for sample in self .bed .iid ],
6767 contigs = [],
6868 filters = [],
6969 )
@@ -74,7 +74,7 @@ def generate_schema(
7474 )
7575
7676 array_specs = [
77- schema .ZarrArraySpec .new (
77+ vcz .ZarrArraySpec .new (
7878 vcf_field = "position" ,
7979 name = "variant_position" ,
8080 dtype = "i4" ,
@@ -83,7 +83,7 @@ def generate_schema(
8383 chunks = [schema_instance .variants_chunk_size ],
8484 description = None ,
8585 ),
86- schema .ZarrArraySpec .new (
86+ vcz .ZarrArraySpec .new (
8787 vcf_field = None ,
8888 name = "variant_allele" ,
8989 dtype = "O" ,
@@ -92,7 +92,7 @@ def generate_schema(
9292 chunks = [schema_instance .variants_chunk_size , 2 ],
9393 description = None ,
9494 ),
95- schema .ZarrArraySpec .new (
95+ vcz .ZarrArraySpec .new (
9696 vcf_field = None ,
9797 name = "call_genotype_phased" ,
9898 dtype = "bool" ,
@@ -104,7 +104,7 @@ def generate_schema(
104104 ],
105105 description = None ,
106106 ),
107- schema .ZarrArraySpec .new (
107+ vcz .ZarrArraySpec .new (
108108 vcf_field = None ,
109109 name = "call_genotype" ,
110110 dtype = "i1" ,
@@ -117,7 +117,7 @@ def generate_schema(
117117 ],
118118 description = None ,
119119 ),
120- schema .ZarrArraySpec .new (
120+ vcz .ZarrArraySpec .new (
121121 vcf_field = None ,
122122 name = "call_genotype_mask" ,
123123 dtype = "bool" ,
@@ -150,7 +150,7 @@ def convert(
150150 samples_chunk_size = samples_chunk_size ,
151151 )
152152 zarr_path = pathlib .Path (zarr_path )
153- vzw = writer .VcfZarrWriter (PlinkFormat , zarr_path )
153+ vzw = vcz .VcfZarrWriter (PlinkFormat , zarr_path )
154154 # Rough heuristic to split work up enough to keep utilisation high
155155 target_num_partitions = max (1 , worker_processes * 4 )
156156 vzw .init (
0 commit comments