1515import numcodecs
1616import numpy as np
1717
18- from bio2zarr import schema
19-
20- from .. import constants , core , provenance , vcf_utils , writer
18+ from .. import constants , core , provenance , schema , vcf_utils , writer
2119
2220logger = logging .getLogger (__name__ )
2321
@@ -1029,28 +1027,33 @@ def iter_genotypes(self, shape, start, stop):
10291027 def generate_schema (
10301028 self , variants_chunk_size = None , samples_chunk_size = None , local_alleles = None
10311029 ):
1032- # Import schema here to avoid circular import
1033- from bio2zarr import schema
1034-
10351030 m = self .num_records
10361031 n = self .num_samples
1037- if samples_chunk_size is None :
1038- samples_chunk_size = 10_000
1039- if variants_chunk_size is None :
1040- variants_chunk_size = 1000
10411032 if local_alleles is None :
10421033 local_alleles = False
1034+
1035+ schema_instance = schema .VcfZarrSchema (
1036+ format_version = schema .ZARR_SCHEMA_FORMAT_VERSION ,
1037+ samples_chunk_size = samples_chunk_size ,
1038+ variants_chunk_size = variants_chunk_size ,
1039+ fields = [],
1040+ samples = self .metadata .samples ,
1041+ contigs = self .metadata .contigs ,
1042+ filters = self .metadata .filters ,
1043+ )
1044+
10431045 logger .info (
1044- f"Generating schema with chunks={ variants_chunk_size , samples_chunk_size } "
1046+ "Generating schema with chunks="
1047+ f"{ schema_instance .variants_chunk_size , schema_instance .samples_chunk_size } "
10451048 )
10461049
10471050 def spec_from_field (field , array_name = None ):
10481051 return schema .ZarrArraySpec .from_field (
10491052 field ,
10501053 num_samples = n ,
10511054 num_variants = m ,
1052- samples_chunk_size = samples_chunk_size ,
1053- variants_chunk_size = variants_chunk_size ,
1055+ samples_chunk_size = schema_instance . samples_chunk_size ,
1056+ variants_chunk_size = schema_instance . variants_chunk_size ,
10541057 array_name = array_name ,
10551058 )
10561059
@@ -1069,7 +1072,7 @@ def fixed_field_spec(
10691072 shape = shape ,
10701073 description = "" ,
10711074 dimensions = dimensions ,
1072- chunks = chunks or [variants_chunk_size ],
1075+ chunks = chunks or [schema_instance . variants_chunk_size ],
10731076 )
10741077
10751078 alt_field = self .fields ["ALT" ]
@@ -1085,14 +1088,14 @@ def fixed_field_spec(
10851088 dtype = "bool" ,
10861089 shape = (m , self .metadata .num_filters ),
10871090 dimensions = ["variants" , "filters" ],
1088- chunks = (variants_chunk_size , self .metadata .num_filters ),
1091+ chunks = (schema_instance . variants_chunk_size , self .metadata .num_filters ),
10891092 ),
10901093 fixed_field_spec (
10911094 name = "variant_allele" ,
10921095 dtype = "O" ,
10931096 shape = (m , max_alleles ),
10941097 dimensions = ["variants" , "alleles" ],
1095- chunks = (variants_chunk_size , max_alleles ),
1098+ chunks = (schema_instance . variants_chunk_size , max_alleles ),
10961099 ),
10971100 fixed_field_spec (
10981101 name = "variant_id" ,
@@ -1127,7 +1130,10 @@ def fixed_field_spec(
11271130 if gt_field is not None and n > 0 :
11281131 ploidy = max (gt_field .summary .max_number - 1 , 1 )
11291132 shape = [m , n ]
1130- chunks = [variants_chunk_size , samples_chunk_size ]
1133+ chunks = [
1134+ schema_instance .variants_chunk_size ,
1135+ schema_instance .samples_chunk_size ,
1136+ ]
11311137 dimensions = ["variants" , "samples" ]
11321138 array_specs .append (
11331139 schema .ZarrArraySpec .new (
@@ -1169,15 +1175,8 @@ def fixed_field_spec(
11691175 if local_alleles :
11701176 array_specs = convert_local_allele_field_types (array_specs )
11711177
1172- return schema .VcfZarrSchema (
1173- format_version = schema .ZARR_SCHEMA_FORMAT_VERSION ,
1174- samples_chunk_size = samples_chunk_size ,
1175- variants_chunk_size = variants_chunk_size ,
1176- fields = array_specs ,
1177- samples = self .metadata .samples ,
1178- contigs = self .metadata .contigs ,
1179- filters = self .metadata .filters ,
1180- )
1178+ schema_instance .fields = array_specs
1179+ return schema_instance
11811180
11821181
11831182@dataclasses .dataclass
0 commit comments