From 0345a1cdef626f6aa42d59f965e49010011abdb4 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 11 Mar 2025 11:45:14 -0400 Subject: [PATCH 01/16] Add dimension separator as a type parameter --- src/Storage/Storage.jl | 19 ++++++----- src/Storage/consolidated.jl | 7 +++-- src/Storage/dictstore.jl | 5 +-- src/Storage/directorystore.jl | 7 +++-- src/Storage/gcstore.jl | 7 +++-- src/Storage/http.jl | 5 +-- src/Storage/s3store.jl | 8 +++-- src/Storage/zipstore.jl | 8 ++--- src/ZArray.jl | 11 +++++-- src/metadata.jl | 59 ++++++++++++++++++++++++++++++----- 10 files changed, 99 insertions(+), 37 deletions(-) diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index 46c819a9..e23268c3 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -3,11 +3,13 @@ # and Dictionaries are supported """ - abstract type AbstractStore + abstract type AbstractStore{S} This the abstract supertype for all Zarr store implementations. Currently only regular files ([`DirectoryStore`](@ref)) and Dictionaries are supported. +S is the dimension separator + ## Interface All subtypes of `AbstractStore` must implement the following methods: @@ -24,7 +26,7 @@ They may optionally implement the following methods: - [`store_read_strategy(s::AbstractStore)`](@ref store_read_strategy): return the read strategy for the given store. See [`SequentialRead`](@ref) and [`ConcurrentRead`](@ref). """ -abstract type AbstractStore end +abstract type AbstractStore{S} end #Define the interface """ @@ -70,17 +72,18 @@ function subkeys end Deletes the given key from the store. """ -citostring(i::CartesianIndex) = join(reverse((i - oneunit(i)).I), '.') -citostring(::CartesianIndex{0}) = "0" +citostring(i::CartesianIndex, sep::Char='.') = join(reverse((i - oneunit(i)).I), sep) +citostring(::CartesianIndex{0}, _::Char) = "0" +citostring(i::CartesianIndex, s::AbstractStore{S}) where S = citostring(i, S) _concatpath(p,s) = isempty(p) ? s : rstrip(p,'/') * '/' * s -Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i)] +Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i, s)] Base.getindex(s::AbstractStore, p, i) = s[_concatpath(p,i)] -Base.delete!(s::AbstractStore, p, i::CartesianIndex) = delete!(s, p, citostring(i)) +Base.delete!(s::AbstractStore, p, i::CartesianIndex) = delete!(s, p, citostring(i, s)) Base.delete!(s::AbstractStore, p, i) = delete!(s, _concatpath(p,i)) Base.haskey(s::AbstractStore, k) = isinitialized(s,k) Base.setindex!(s::AbstractStore,v,p,i) = setindex!(s,v,_concatpath(p,i)) -Base.setindex!(s::AbstractStore,v,p,i::CartesianIndex) = s[p, citostring(i)]=v +Base.setindex!(s::AbstractStore,v,p,i::CartesianIndex) = s[p, citostring(i, s)]=v maybecopy(x) = copy(x) @@ -111,7 +114,7 @@ end is_zgroup(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zgroup")) is_zarray(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zarray")) -isinitialized(s::AbstractStore, p, i::CartesianIndex)=isinitialized(s,p,citostring(i)) +isinitialized(s::AbstractStore{S}, p, i::CartesianIndex) where S = isinitialized(s,p,citostring(i, S)) isinitialized(s::AbstractStore, p, i) = isinitialized(s,_concatpath(p,i)) isinitialized(s::AbstractStore, i) = s[i] !== nothing diff --git a/src/Storage/consolidated.jl b/src/Storage/consolidated.jl index 0b28f553..3ab49e8f 100644 --- a/src/Storage/consolidated.jl +++ b/src/Storage/consolidated.jl @@ -3,18 +3,19 @@ A store that wraps any other AbstractStore but has access to the consolidated me stored in the .zmetadata key. Whenever data attributes or metadata are accessed, the data will be read from the dictionary instead. """ -struct ConsolidatedStore{P} <: AbstractStore +struct ConsolidatedStore{S,P} <: AbstractStore{S} parent::P path::String cons::Dict{String,Any} end -function ConsolidatedStore(s::AbstractStore, p) +function ConsolidatedStore{S}(s::AbstractStore, p) where S d = s[p, ".zmetadata"] if d === nothing throw(ArgumentError("Could not find consolidated metadata for store $s")) end - ConsolidatedStore(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) + ConsolidatedStore{S, typeof(s)}(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) end +ConsolidatedStore(s::AbstractStore, p) = ConsolidateStore{'.'}(s, p) function Base.show(io::IO,d::ConsolidatedStore) b = IOBuffer() diff --git a/src/Storage/dictstore.jl b/src/Storage/dictstore.jl index 7815ed20..87f8af11 100644 --- a/src/Storage/dictstore.jl +++ b/src/Storage/dictstore.jl @@ -1,8 +1,9 @@ # Stores data in a simple dict in memory -struct DictStore <: AbstractStore +struct DictStore{S} <: AbstractStore{S} a::Dict{String,Vector{UInt8}} end -DictStore() = DictStore(Dict{String,Vector{UInt8}}()) +DictStore() = DictStore{'.'}(Dict{String,Vector{UInt8}}()) +DictStore{S}() where S = DictStore{S}(Dict{String,Vector{UInt8}}()) Base.show(io::IO,d::DictStore) = print(io,"Dictionary Storage") function _pdict(d::DictStore,p) diff --git a/src/Storage/directorystore.jl b/src/Storage/directorystore.jl index 6ded94fb..55b64e1e 100644 --- a/src/Storage/directorystore.jl +++ b/src/Storage/directorystore.jl @@ -9,12 +9,13 @@ function normalize_path(p::AbstractString) end # Stores files in a regular file system -struct DirectoryStore <: AbstractStore +struct DirectoryStore{S} <: AbstractStore{S} folder::String - function DirectoryStore(p) + function DirectoryStore{S}(p) where S mkpath(normalize_path(p)) - new(normalize_path(p)) + new{S}(normalize_path(p)) end + DirectoryStore(p) = DirectoryStore{'.'}(p) end function Base.getindex(d::DirectoryStore, i::String) diff --git a/src/Storage/gcstore.jl b/src/Storage/gcstore.jl index 5f85820d..8e24cfe6 100644 --- a/src/Storage/gcstore.jl +++ b/src/Storage/gcstore.jl @@ -56,10 +56,10 @@ function _gcs_request_headers() return headers end -struct GCStore <: AbstractStore +struct GCStore{S} <: AbstractStore{S} bucket::String - function GCStore(url::String) + function GCStore{S}(url::String) where S uri = URI(url) if uri.scheme == "gs" @@ -71,6 +71,7 @@ struct GCStore <: AbstractStore @debug "GCS bucket: $bucket" new(bucket) end + GCStore(url::String) = GCStore{'.'}(url) end @@ -147,4 +148,4 @@ function storefromstring(::Type{<:GCStore}, url,_) return GCStore(url),p end -store_read_strategy(::GCStore) = ConcurrentRead(concurrent_io_tasks[]) \ No newline at end of file +store_read_strategy(::GCStore) = ConcurrentRead(concurrent_io_tasks[]) diff --git a/src/Storage/http.jl b/src/Storage/http.jl index 9b68cb14..223d4d4f 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -10,11 +10,12 @@ datasets being served through the [xpublish](https://xpublish.readthedocs.io/en/ python package. In case you experience performance issues, one can try to use `HTTP.set_default_connection_limit!` to increase the number of concurrent connections. """ -struct HTTPStore <: AbstractStore +struct HTTPStore{S} <: AbstractStore{S} url::String allowed_codes::Set{Int} + HTTPStore{S}(url, allowed_codes = Set((404,))) where S = new{S}(url, allowed_codes) end -HTTPStore(url) = HTTPStore(url,Set((404,))) +HTTPStore(url) = HTTPStore{'.'}(url) function Base.getindex(s::HTTPStore, k::String) r = HTTP.request("GET",string(s.url,"/",k),status_exception = false,socket_type_tls=OpenSSL.SSLStream) diff --git a/src/Storage/s3store.jl b/src/Storage/s3store.jl index aaab004f..cfc2e627 100644 --- a/src/Storage/s3store.jl +++ b/src/Storage/s3store.jl @@ -1,19 +1,21 @@ using AWSS3: AWSS3, s3_put, s3_get, s3_delete, s3_list_objects, s3_exists -struct S3Store <: AbstractStore +struct S3Store{S} <: AbstractStore{S} bucket::String aws::AWSS3.AWS.AbstractAWSConfig end -function S3Store(bucket::String; +function S3Store{S}(bucket::String; aws = nothing, - ) + ) where S if aws === nothing aws = AWSS3.AWS.global_aws_config() end S3Store(bucket, aws) end +S3Store(bucket, aws) = S3Store{'.'}(bucket, aws) +S3Store(bucket; aws = nothing) = S3Store{'.'}(bucket, aws) Base.show(io::IO,::S3Store) = print(io,"S3 Object Storage") diff --git a/src/Storage/zipstore.jl b/src/Storage/zipstore.jl index 8e8bbd27..9fd3ca25 100644 --- a/src/Storage/zipstore.jl +++ b/src/Storage/zipstore.jl @@ -5,12 +5,12 @@ import ZipArchives A read only store that wraps an `AbstractVector{UInt8}` that contains a zip file. """ -struct ZipStore{T <: AbstractVector{UInt8}} <: AbstractStore +struct ZipStore{S, T <: AbstractVector{UInt8}} <: AbstractStore{S} r::ZipArchives.ZipBufferReader{T} + ZipStore{S}(data::AbstractVector{UInt8}) where S = new{S, ZipArchives.ZipBufferReader}(ZipArchives.ZipBufferReader(data)) end - -ZipStore(data::AbstractVector{UInt8}) = ZipStore(ZipArchives.ZipBufferReader(data)) +ZipStore(data::AbstractVector{UInt8}) = ZipStore{'.'}(ZipArchives.ZipBufferReader(data)) Base.show(io::IO,::ZipStore) = print(io,"Read Only Zip Storage") @@ -94,4 +94,4 @@ function _writezip(w::ZipArchives.ZipWriter, s::AbstractStore, p::String) for subdir in subdirs(s, p) _writezip(w, s, _make_prefix(p)*subdir) end -end \ No newline at end of file +end diff --git a/src/ZArray.jl b/src/ZArray.jl index b0955687..de5a8ecd 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -311,6 +311,7 @@ Creates a new empty zarr array with element type `T` and array dimensions `dims` * `attrs=Dict()` a dict containing key-value pairs with metadata attributes associated to the array * `writeable=true` determines if the array is opened in read-only or write mode * `indent_json=false` determines if indents are added to format the json files `.zarray` and `.zattrs`. This makes them more readable, but increases file size. +* `dimension_separator='.'` sets how chunks are encoded. The Zarr v2 default is '.' such that the first 3D chunk would be `0.0.0`. The Zarr v3 default is `/`. """ function zcreate(::Type{T}, dims::Integer...; name="", @@ -335,14 +336,20 @@ function zcreate(::Type{T},storage::AbstractStore, filters = filterfromtype(T), attrs=Dict(), writeable=true, - indent_json=false + indent_json=false, + dimension_separator='.' ) where T + + if dimension_separator isa AbstractString + # Convert AbstractString to Char + dimension_separator = only(dimension_separator) + end length(dims) == length(chunks) || throw(DimensionMismatch("Dims must have the same length as chunks")) N = length(dims) C = typeof(compressor) T2 = (fill_value === nothing || !fill_as_missing) ? T : Union{T,Missing} - metadata = Metadata{T2, N, C, typeof(filters)}( + metadata = Metadata{T2, N, C, typeof(filters), dimension_separator}( 2, dims, chunks, diff --git a/src/metadata.jl b/src/metadata.jl index d80e7c13..bfadfb03 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -91,9 +91,18 @@ Each array requires essential configuration metadata to be stored, enabling corr interpretation of the stored data. This metadata is encoded using JSON and stored as the value of the “.zarray” key within an array store. +# Type Parameters +* T - element type of the array +* N - dimensionality of the array +* C - compressor +* F - filters +* S - dimension separator + +# See Also + https://zarr.readthedocs.io/en/stable/spec/v2.html#metadata """ -struct Metadata{T, N, C, F} +struct Metadata{T, N, C, F, S} zarr_format::Int shape::Base.RefValue{NTuple{N, Int}} chunks::NTuple{N, Int} @@ -102,15 +111,46 @@ struct Metadata{T, N, C, F} fill_value::Union{T, Nothing} order::Char filters::F # not yet supported - function Metadata{T2, N, C, F}(zarr_format, shape, chunks, dtype, compressor,fill_value, order, filters) where {T2,N,C,F} + function Metadata{T2, N, C, F, S}(zarr_format, shape, chunks, dtype, compressor, fill_value, order, filters) where {T2,N,C,F,S} #We currently only support version zarr_format == 2 || throw(ArgumentError("Zarr.jl currently only support v2 of the protocol")) #Do some sanity checks to make sure we have a sane array any(<(0), shape) && throw(ArgumentError("Size must be positive")) any(<(1), chunks) && throw(ArgumentError("Chunk size must be >= 1 along each dimension")) order === 'C' || throw(ArgumentError("Currently only 'C' storage order is supported")) - new{T2, N, C, F}(zarr_format, Base.RefValue{NTuple{N,Int}}(shape), chunks, dtype, compressor,fill_value, order, filters) + new{T2, N, C, F, S}(zarr_format, Base.RefValue{NTuple{N,Int}}(shape), chunks, dtype, compressor,fill_value, order, filters) end + function Metadata{T2, N, C, F}( + zarr_format, + shape, + chunks, + dtype, + compressor, + fill_value, + order, + filters, + dimension_separator::Char = '.' + ) where {T2,N,C,F} + return Metadata{T2, N, C, F, dimension_separator}( + zarr_format, + shape, + chunks, + dtype, + compressor, + fill_value, + order + ) + end + +end + +const DimensionSeparatedMetadata{S} = Metadata{<: Any, <: Any, <: Any, <: Any, S} + +function Base.getproperty(m::DimensionSeparatedMetadata{S}, name::Symbol) where S + if name == :dimension_separator + return S + end + return getfield(m, name) end #To make unit tests pass with ref shape @@ -123,7 +163,8 @@ function ==(m1::Metadata, m2::Metadata) m1.compressor == m2.compressor && m1.fill_value == m2.fill_value && m1.order == m2.order && - m1.filters == m2.filters + m1.filters == m2.filters && + m1.dimension_separator == m2.dimension_separator end @@ -135,9 +176,10 @@ function Metadata(A::AbstractArray{T, N}, chunks::NTuple{N, Int}; order::Char='C', filters::Nothing=nothing, fill_as_missing = false, + dimension_separator::Char = '.' ) where {T, N, C} T2 = (fill_value === nothing || !fill_as_missing) ? T : Union{T,Missing} - Metadata{T2, N, C, typeof(filters)}( + Metadata{T2, N, C, typeof(filters), dimension_separator}( zarr_format, size(A), chunks, @@ -175,7 +217,9 @@ function Metadata(d::AbstractDict, fill_as_missing) TU = (fv === nothing || !fill_as_missing) ? T : Union{T,Missing} - Metadata{TU, N, C, F}( + S = only(get(d, "dimension_separator", '.')) + + Metadata{TU, N, C, F, S}( d["zarr_format"], NTuple{N, Int}(d["shape"]) |> reverse, NTuple{N, Int}(d["chunks"]) |> reverse, @@ -197,7 +241,8 @@ function JSON.lower(md::Metadata) "compressor" => md.compressor, "fill_value" => fill_value_encoding(md.fill_value), "order" => md.order, - "filters" => md.filters + "filters" => md.filters, + "dimension_separator" => md.dimension_separator ) end From 61786e3661c9c3d017fe0748e72f3e651e0566d3 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 12 Mar 2025 17:24:42 -0400 Subject: [PATCH 02/16] Fix ZipStore constructor --- src/Storage/zipstore.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storage/zipstore.jl b/src/Storage/zipstore.jl index 9fd3ca25..f8a68f7a 100644 --- a/src/Storage/zipstore.jl +++ b/src/Storage/zipstore.jl @@ -8,9 +8,9 @@ A read only store that wraps an `AbstractVector{UInt8}` that contains a zip file struct ZipStore{S, T <: AbstractVector{UInt8}} <: AbstractStore{S} r::ZipArchives.ZipBufferReader{T} ZipStore{S}(data::AbstractVector{UInt8}) where S = new{S, ZipArchives.ZipBufferReader}(ZipArchives.ZipBufferReader(data)) + ZipStore(data::AbstractVector{UInt8}) = ZipStore{'.'}(data) end -ZipStore(data::AbstractVector{UInt8}) = ZipStore{'.'}(ZipArchives.ZipBufferReader(data)) Base.show(io::IO,::ZipStore) = print(io,"Read Only Zip Storage") From cbb23cebaed913929e8a343b8fbabf64d6ca02c1 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 12 Mar 2025 17:25:48 -0400 Subject: [PATCH 03/16] Fix ConsolidatedStore --- src/Storage/consolidated.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storage/consolidated.jl b/src/Storage/consolidated.jl index 3ab49e8f..fa004033 100644 --- a/src/Storage/consolidated.jl +++ b/src/Storage/consolidated.jl @@ -15,7 +15,7 @@ function ConsolidatedStore{S}(s::AbstractStore, p) where S end ConsolidatedStore{S, typeof(s)}(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) end -ConsolidatedStore(s::AbstractStore, p) = ConsolidateStore{'.'}(s, p) +ConsolidatedStore(s::AbstractStore, p) = ConsolidatedStore{'.'}(s, p) function Base.show(io::IO,d::ConsolidatedStore) b = IOBuffer() From e4630a9899914d8ad5353f4301333edc026cf69f Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 12 Mar 2025 17:27:38 -0400 Subject: [PATCH 04/16] Fix S3Store constructor --- src/Storage/s3store.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storage/s3store.jl b/src/Storage/s3store.jl index cfc2e627..f6afd1c2 100644 --- a/src/Storage/s3store.jl +++ b/src/Storage/s3store.jl @@ -15,7 +15,7 @@ function S3Store{S}(bucket::String; S3Store(bucket, aws) end S3Store(bucket, aws) = S3Store{'.'}(bucket, aws) -S3Store(bucket; aws = nothing) = S3Store{'.'}(bucket, aws) +S3Store(bucket; aws = nothing) = S3Store{'.'}(bucket; aws) Base.show(io::IO,::S3Store) = print(io,"S3 Object Storage") From b9e175fb5915b922d622ae787bf7167ee81d1829 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Thu, 13 Mar 2025 03:08:39 -0400 Subject: [PATCH 05/16] Add version as a type parameter --- src/Storage/Storage.jl | 27 ++++++++++++++++++----- src/Storage/consolidated.jl | 13 +++++++---- src/Storage/dictstore.jl | 7 +++--- src/Storage/directorystore.jl | 9 ++++---- src/Storage/gcstore.jl | 7 +++--- src/Storage/http.jl | 7 +++--- src/Storage/s3store.jl | 14 +++++++----- src/Storage/zipstore.jl | 7 +++--- test/runtests.jl | 12 +++++----- test/storage.jl | 41 ++++++++++++++++++++++++++++------- 10 files changed, 99 insertions(+), 45 deletions(-) diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index e23268c3..9a056b57 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -2,12 +2,27 @@ # Defines different storages for zarr arrays. Currently only regular files (DirectoryStore) # and Dictionaries are supported +# Default Zarr version +const DV = 2 + +# Default Zarr separator + +# Default Zarr v2 separator +const DS2 = '.' +# Default Zarr v3 separator +const DS3 = '/' + +default_sep(version) = version == 2 ? DS2 : DS3 +const DS = default_sep(DV) + """ - abstract type AbstractStore{S} + abstract type AbstractStore{V,S} This the abstract supertype for all Zarr store implementations. Currently only regular files ([`DirectoryStore`](@ref)) and Dictionaries are supported. +# Type Parameters +V is the version, either 2 or 3 S is the dimension separator ## Interface @@ -26,7 +41,7 @@ They may optionally implement the following methods: - [`store_read_strategy(s::AbstractStore)`](@ref store_read_strategy): return the read strategy for the given store. See [`SequentialRead`](@ref) and [`ConcurrentRead`](@ref). """ -abstract type AbstractStore{S} end +abstract type AbstractStore{V,S} end #Define the interface """ @@ -72,9 +87,9 @@ function subkeys end Deletes the given key from the store. """ -citostring(i::CartesianIndex, sep::Char='.') = join(reverse((i - oneunit(i)).I), sep) -citostring(::CartesianIndex{0}, _::Char) = "0" -citostring(i::CartesianIndex, s::AbstractStore{S}) where S = citostring(i, S) +@inline citostring(i::CartesianIndex, version::Int=DV, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) +@inline citostring(::CartesianIndex{0}, version::Int=DV, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) +citostring(i::CartesianIndex, s::AbstractStore{V, S}) where {V,S} = citostring(i, V, S) _concatpath(p,s) = isempty(p) ? s : rstrip(p,'/') * '/' * s Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i, s)] @@ -114,7 +129,7 @@ end is_zgroup(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zgroup")) is_zarray(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zarray")) -isinitialized(s::AbstractStore{S}, p, i::CartesianIndex) where S = isinitialized(s,p,citostring(i, S)) +isinitialized(s::AbstractStore, p, i::CartesianIndex) = isinitialized(s,p,citostring(i, s)) isinitialized(s::AbstractStore, p, i) = isinitialized(s,_concatpath(p,i)) isinitialized(s::AbstractStore, i) = s[i] !== nothing diff --git a/src/Storage/consolidated.jl b/src/Storage/consolidated.jl index fa004033..2429622c 100644 --- a/src/Storage/consolidated.jl +++ b/src/Storage/consolidated.jl @@ -3,19 +3,24 @@ A store that wraps any other AbstractStore but has access to the consolidated me stored in the .zmetadata key. Whenever data attributes or metadata are accessed, the data will be read from the dictionary instead. """ -struct ConsolidatedStore{S,P} <: AbstractStore{S} +struct ConsolidatedStore{V,S,P} <: AbstractStore{V,S} parent::P path::String cons::Dict{String,Any} end -function ConsolidatedStore{S}(s::AbstractStore, p) where S +function ConsolidatedStore{V,S}(s::AbstractStore, p) where {V,S} d = s[p, ".zmetadata"] if d === nothing throw(ArgumentError("Could not find consolidated metadata for store $s")) end - ConsolidatedStore{S, typeof(s)}(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) + ConsolidatedStore{V, S, typeof(s)}(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) end -ConsolidatedStore(s::AbstractStore, p) = ConsolidatedStore{'.'}(s, p) +ConsolidatedStore{V}(s::AbstractStore, p) where V = ConsolidatedStore{V, default_sep(V)}(s, p) +ConsolidatedStore(s::AbstractStore, p) = ConsolidatedStore{DV,DS}(s, p) + +ConsolidatedStore(s::AbstractStore, p, d) = ConsolidatedStore{DV, DS}(s,p,d) +ConsolidatedStore{V}(s::AbstractStore, p, d) where V = ConsolidatedStore{V, default_sep(V)}(s,p,d) +ConsolidatedStore{V,S}(s::AbstractStore, p, d) where {V,S} = ConsolidatedStore{V, default_sep(V), typeof(s)}(s,p,d) function Base.show(io::IO,d::ConsolidatedStore) b = IOBuffer() diff --git a/src/Storage/dictstore.jl b/src/Storage/dictstore.jl index 87f8af11..f6598971 100644 --- a/src/Storage/dictstore.jl +++ b/src/Storage/dictstore.jl @@ -1,9 +1,10 @@ # Stores data in a simple dict in memory -struct DictStore{S} <: AbstractStore{S} +struct DictStore{V,S} <: AbstractStore{V,S} a::Dict{String,Vector{UInt8}} + DictStore{V,S}(a=Dict{String,Vector{UInt8}}()) where {V,S} = new{V,S}(a) end -DictStore() = DictStore{'.'}(Dict{String,Vector{UInt8}}()) -DictStore{S}() where S = DictStore{S}(Dict{String,Vector{UInt8}}()) +DictStore(a=Dict{String,Vector{UInt8}}()) = DictStore{DV,DS}(a) +DictStore{V}(a=Dict{String,Vector{UInt8}}()) where V = DictStore{V, default_sep(V)}(a) Base.show(io::IO,d::DictStore) = print(io,"Dictionary Storage") function _pdict(d::DictStore,p) diff --git a/src/Storage/directorystore.jl b/src/Storage/directorystore.jl index 55b64e1e..0b7f6434 100644 --- a/src/Storage/directorystore.jl +++ b/src/Storage/directorystore.jl @@ -9,13 +9,14 @@ function normalize_path(p::AbstractString) end # Stores files in a regular file system -struct DirectoryStore{S} <: AbstractStore{S} +struct DirectoryStore{V,S} <: AbstractStore{V,S} folder::String - function DirectoryStore{S}(p) where S + function DirectoryStore{V,S}(p) where {V,S} mkpath(normalize_path(p)) - new{S}(normalize_path(p)) + new{V,S}(normalize_path(p)) end - DirectoryStore(p) = DirectoryStore{'.'}(p) + DirectoryStore(p) = DirectoryStore{DV,DS}(p) + DirectoryStore{V}(p) where V = DirectoryStore{V, default_sep(V)}(p) end function Base.getindex(d::DirectoryStore, i::String) diff --git a/src/Storage/gcstore.jl b/src/Storage/gcstore.jl index 8e24cfe6..84dd6f6f 100644 --- a/src/Storage/gcstore.jl +++ b/src/Storage/gcstore.jl @@ -56,10 +56,10 @@ function _gcs_request_headers() return headers end -struct GCStore{S} <: AbstractStore{S} +struct GCStore{V,S} <: AbstractStore{V,S} bucket::String - function GCStore{S}(url::String) where S + function GCStore{V,S}(url::String) where {V,S} uri = URI(url) if uri.scheme == "gs" @@ -71,7 +71,8 @@ struct GCStore{S} <: AbstractStore{S} @debug "GCS bucket: $bucket" new(bucket) end - GCStore(url::String) = GCStore{'.'}(url) + GCStore(url::String) = GCStore{DV,DS}(url) + GCStore{V}(url::String) where V = GCStore{V, default_sep(V)}(url) end diff --git a/src/Storage/http.jl b/src/Storage/http.jl index 223d4d4f..f335e0f7 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -10,12 +10,13 @@ datasets being served through the [xpublish](https://xpublish.readthedocs.io/en/ python package. In case you experience performance issues, one can try to use `HTTP.set_default_connection_limit!` to increase the number of concurrent connections. """ -struct HTTPStore{S} <: AbstractStore{S} +struct HTTPStore{V,S} <: AbstractStore{V,S} url::String allowed_codes::Set{Int} - HTTPStore{S}(url, allowed_codes = Set((404,))) where S = new{S}(url, allowed_codes) + HTTPStore{V,S}(url, allowed_codes = Set((404,))) where {V,S} = new{V,S}(url, allowed_codes) end -HTTPStore(url) = HTTPStore{'.'}(url) +HTTPStore(url) = HTTPStore{DV, DS}(url) +HTTPStore{V}(url) where V = HTTPStore{V, default_sep(V)}(url) function Base.getindex(s::HTTPStore, k::String) r = HTTP.request("GET",string(s.url,"/",k),status_exception = false,socket_type_tls=OpenSSL.SSLStream) diff --git a/src/Storage/s3store.jl b/src/Storage/s3store.jl index f6afd1c2..f3d0b1fa 100644 --- a/src/Storage/s3store.jl +++ b/src/Storage/s3store.jl @@ -1,21 +1,23 @@ using AWSS3: AWSS3, s3_put, s3_get, s3_delete, s3_list_objects, s3_exists -struct S3Store{S} <: AbstractStore{S} +struct S3Store{V,S} <: AbstractStore{V,S} bucket::String aws::AWSS3.AWS.AbstractAWSConfig end -function S3Store{S}(bucket::String; +function S3Store{V,S}(bucket::String; aws = nothing, - ) where S + ) where {V,S} if aws === nothing aws = AWSS3.AWS.global_aws_config() end - S3Store(bucket, aws) + S3Store{V,S}(bucket, aws) end -S3Store(bucket, aws) = S3Store{'.'}(bucket, aws) -S3Store(bucket; aws = nothing) = S3Store{'.'}(bucket; aws) +S3Store(bucket, aws) = S3Store{DV,DS}(bucket, aws) +S3Store{V}(bucket, aws) where V = S3Store{V, default_sep(V)}(bucket, aws) +S3Store(bucket; aws = nothing) = S3Store{DV, DS}(bucket; aws) +S3Store{V}(bucket; aws = nothing) where V = S3Store{V, default_sep(V)}(bucket; aws) Base.show(io::IO,::S3Store) = print(io,"S3 Object Storage") diff --git a/src/Storage/zipstore.jl b/src/Storage/zipstore.jl index f8a68f7a..8fb5aca0 100644 --- a/src/Storage/zipstore.jl +++ b/src/Storage/zipstore.jl @@ -5,10 +5,11 @@ import ZipArchives A read only store that wraps an `AbstractVector{UInt8}` that contains a zip file. """ -struct ZipStore{S, T <: AbstractVector{UInt8}} <: AbstractStore{S} +struct ZipStore{V, S, T <: AbstractVector{UInt8}} <: AbstractStore{V, S} r::ZipArchives.ZipBufferReader{T} - ZipStore{S}(data::AbstractVector{UInt8}) where S = new{S, ZipArchives.ZipBufferReader}(ZipArchives.ZipBufferReader(data)) - ZipStore(data::AbstractVector{UInt8}) = ZipStore{'.'}(data) + ZipStore{V,S}(data::T) where {V,S,T} = new{V, S, T}(ZipArchives.ZipBufferReader(data)) + ZipStore{V}(data::AbstractVector{UInt8}) where V = ZipStore{V, default_sep(V)}(data) + ZipStore(data::AbstractVector{UInt8}) = ZipStore{DV,DS}(data) end diff --git a/test/runtests.jl b/test/runtests.jl index c472eb1f..34790c9b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,7 +15,7 @@ CondaPkg.add("zarr"; version="2.*") @testset "fields" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.DictStore} + Zarr.DictStore{2, '.'}} @test length(z.storage.a) === 3 @test length(z.storage.a["0.0"]) === 64 @@ -40,7 +40,7 @@ CondaPkg.add("zarr"; version="2.*") @testset "methods" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.DictStore} + Zarr.DictStore{2, '.'}} @test eltype(z) === Int64 @test ndims(z) === 2 @@ -69,12 +69,14 @@ CondaPkg.add("zarr"; version="2.*") @test JSON.parsefile("$dir/$name/.zarray") == Dict{String, Any}( "dtype" => " nothing, - "shape" => [3, 2], + "shape" => Any[3, 2], "order" => "C", "zarr_format" => 2, - "chunks" => [3, 2], + "chunks" => Any[3, 2], "fill_value" => nothing, - "compressor" => nothing) + "compressor" => nothing, + "dimension_separator" => "." + ) # call gc to avoid unlink: operation not permitted (EPERM) on Windows # might be because files are left open # from https://github.com/JuliaLang/julia/blob/f6344d32d3ebb307e2b54a77e042559f42d2ebf6/stdlib/SharedArrays/test/runtests.jl#L146 diff --git a/test/storage.jl b/test/storage.jl index 9e4fac73..1a73ad10 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -8,10 +8,31 @@ @test Zarr.normalize_path("/path/to/a") == "/path/to/a" end +@testset "Version and Dimension Separator" begin + let ci = CartesianIndex() + @test Zarr.citostring(ci, 2, '.') == "0" + @test Zarr.citostring(ci, 2, '/') == "0" + @test Zarr.citostring(ci, 3, '.') == "c.0" + @test Zarr.citostring(ci, 3, '/') == "c/0" + end + let ci = CartesianIndex(1,1,1) + @test Zarr.citostring(ci, 2, '.') == "0.0.0" + @test Zarr.citostring(ci, 2, '/') == "0/0/0" + @test Zarr.citostring(ci, 3, '.') == "c.0.0.0" + @test Zarr.citostring(ci, 3, '/') == "c/0/0/0" + end + let ci = CartesianIndex(1,3,5) + @test Zarr.citostring(ci, 2, '.') == "4.2.0" + @test Zarr.citostring(ci, 2, '/') == "4/2/0" + @test Zarr.citostring(ci, 3, '.') == "c.4.2.0" + @test Zarr.citostring(ci, 3, '/') == "c/4/2/0" + end +end + """ Function to test the interface of AbstractStore. Every complete implementation should pass this test. """ -function test_store_common(ds) +function test_store_common(ds::Zarr.AbstractStore{V,S}) where {V,S} @test !Zarr.is_zgroup(ds,"") ds[".zgroup"]=rand(UInt8,50) @test haskey(ds,".zgroup") @@ -31,17 +52,21 @@ function test_store_common(ds) @test Zarr.subdirs(ds,"bar") == String[] #Test getindex and setindex data = rand(UInt8,50) - ds["bar/0.0.0"] = data + first_ci_str = Zarr.citostring(CartesianIndex(1,1,1), V, S) + second_ci_str = Zarr.citostring(CartesianIndex(2,1,1), V, S) + ds["bar/" * first_ci_str] = data @test ds["bar/0.0.0"]==data @test Zarr.storagesize(ds,"bar")==50 - @test Zarr.isinitialized(ds,"bar/0.0.0") - @test !Zarr.isinitialized(ds,"bar/0.0.1") + @test Zarr.isinitialized(ds,"bar/" * first_ci_str) + @test !Zarr.isinitialized(ds,"bar/" * second_ci_str) Zarr.writeattrs(ds,"bar",Dict("a"=>"b")) @test Zarr.getattrs(ds,"bar")==Dict("a"=>"b") - delete!(ds,"bar/0.0.0") - @test !Zarr.isinitialized(ds,"bar",CartesianIndex((0,0,0))) - @test !Zarr.isinitialized(ds,"bar/0.0.0") - ds["bar/0.0.0"] = data + delete!(ds,"bar/" * first_ci_str) + @test !Zarr.isinitialized(ds,"bar",CartesianIndex((1,1,1))) + @test !Zarr.isinitialized(ds,"bar/" * first_ci_str) + ds["bar/" * first_ci_str] = data + @test !Zarr.isinitialized(ds, "bar", CartesianIndex(0,0,0)) + @test Zarr.isinitialized(ds, "bar", CartesianIndex(1,1,1)) #Add tests for empty storage @test Zarr.isemptysub(ds,"ba") @test Zarr.isemptysub(ds,"ba/") From 362437677d10d09e73e11f7b4f5248159b149d8c Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 17 Mar 2025 23:21:34 -0400 Subject: [PATCH 06/16] Check metadata for dimension_separator and zarr_format --- src/Storage/Storage.jl | 61 ++++++++++++++++++++++++++++++++++++++++++ src/Storage/gcstore.jl | 1 + src/Storage/http.jl | 13 ++++++++- src/Storage/s3store.jl | 1 + src/ZArray.jl | 27 ++++++++++++------- src/ZGroup.jl | 28 ++++++++++++++----- 6 files changed, 115 insertions(+), 16 deletions(-) diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index 9a056b57..de40d32e 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -215,6 +215,19 @@ isemptysub(s::AbstractStore, p) = isempty(subkeys(s,p)) && isempty(subdirs(s,p)) #during auto-check of storage format when doing zopen storageregexlist = Pair[] +function Base.getproperty(store::AbstractStore{V,S}, sym::Symbol) where {V,S} + if sym == :dimension_separator + return S + elseif sym == :zarr_format + return V + else + return getfield(store, sym) + end +end +function Base.propertynames(store::AbstractStore) + return (:dimension_separator, :version, getfields(store)...) +end + include("directorystore.jl") include("dictstore.jl") include("s3store.jl") @@ -222,3 +235,51 @@ include("gcstore.jl") include("consolidated.jl") include("http.jl") include("zipstore.jl") + +# Itemize subtypes of AbstractStore for code generation below +const KnownAbstractStores = (DirectoryStore, GCStore, S3Store, ConsolidatedStore, DictStore, HTTPStore, ZipStore) + +""" + Zarr.set_dimension_separator(::AbstractStore{V}, sep::Char)::AbstractStore{V,sep} + +Returns an AbstractStore of the same type with the same `zarr_format` parameter, `V`, +but with a dimension separator of `sep`. + +# Examples + +``` +julia> Zarr.set_dimension_separator(Zarr.DictStore{2, '.'}(), '/') |> typeof +Zarr.DictStore{2, '/'} +``` + +""" +set_dimension_separator + +""" + set_zarr_format(::AbstractStore{<: Any, S}, zarr_format::Int)::AbstractStore{zarr_format,S} + +Returns an AbstractStore of the same type with the same `dimension_separator` parameter, `S`, +but with the specified `zarr_format` parameter. + +# Examples + +``` +julia> Zarr.set_zarr_format(Zarr.DictStore{2, '.'}(), 3) |> typeof +Zarr.DictStore{3, '.'} +``` + +""" +set_zarr_format + +for T in KnownAbstractStores + e = quote + # copy constructor to change zarr_format and dimension_separator parameters + (::Type{$T{V,S}})(store::$T) where {V,S} = + $T{V,S}(ntuple(i->getfield(store, i), nfields(store))...) + set_dimension_separator(store::$T{V}, sep::Char) where V = + $T{V,sep}(ntuple(i->getfield(store, i), nfields(store))...) + set_zarr_format(store::$T{<: Any, S}, zarr_format::Int) where S = + $T{zarr_format,S}(ntuple(i->getfield(store, i), nfields(store))...) + end + eval(e) +end diff --git a/src/Storage/gcstore.jl b/src/Storage/gcstore.jl index 84dd6f6f..5f0860a2 100644 --- a/src/Storage/gcstore.jl +++ b/src/Storage/gcstore.jl @@ -137,6 +137,7 @@ pushfirst!(storageregexlist,r"^http://storage.googleapis.com"=>GCStore) push!(storageregexlist,r"^gs://"=>GCStore) function storefromstring(::Type{<:GCStore}, url,_) + # TODO: Check metadata for version and dimension separator uri = URI(url) if uri.scheme == "gs" p = lstrip(uri.path,'/') diff --git a/src/Storage/http.jl b/src/Storage/http.jl index f335e0f7..e213642a 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -41,7 +41,18 @@ end push!(storageregexlist,r"^https://"=>HTTPStore) push!(storageregexlist,r"^http://"=>HTTPStore) -storefromstring(::Type{<:HTTPStore}, s,_) = ConsolidatedStore(HTTPStore(s),""),"" +function storefromstring(::Type{<:HTTPStore}, s,_) + http_store = HTTPStore(s) + if is_zarray(http_store, "") + meta = getmetadata(http_store, "", false) + http_store = HTTPStore{meta.zarr_format, meta.dimension_separator}(s) + end + if http_store["", ".zmetadata"] !== nothing + return ConsolidatedStore(http_store,""),"" + else + return http_store,"" + end +end """ missing_chunk_return_code!(s::HTTPStore, code::Union{Int,AbstractVector{Int}}) diff --git a/src/Storage/s3store.jl b/src/Storage/s3store.jl index f3d0b1fa..03681fa3 100644 --- a/src/Storage/s3store.jl +++ b/src/Storage/s3store.jl @@ -78,6 +78,7 @@ allstrings(v,prefixkey) = [rstrip(String(v[prefixkey]),'/')] push!(storageregexlist,r"^s3://"=>S3Store) function storefromstring(::Type{<:S3Store}, s, _) + # TODO: Check metadata for version and dimension separator decomp = split(s,"/",keepempty=false) bucket = decomp[2] path = join(decomp[3:end],"/") diff --git a/src/ZArray.jl b/src/ZArray.jl index de5a8ecd..0aa472b4 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -316,17 +316,24 @@ Creates a new empty zarr array with element type `T` and array dimensions `dims` function zcreate(::Type{T}, dims::Integer...; name="", path=nothing, + dimension_separator='.', kwargs... ) where T + + if dimension_separator isa AbstractString + # Convert AbstractString to Char + dimension_separator = only(dimension_separator) + end + if path===nothing - store = DictStore() + store = DictStore{DV, dimension_separator}() else - store = DirectoryStore(joinpath(path,name)) + store = DirectoryStore{DV, dimension_separator}(joinpath(path,name)) end zcreate(T, store, dims...; kwargs...) end -function zcreate(::Type{T},storage::AbstractStore, +function zcreate(::Type{T},storage::AbstractStore{<: Any,S}, dims...; path = "", chunks=dims, @@ -337,12 +344,14 @@ function zcreate(::Type{T},storage::AbstractStore, attrs=Dict(), writeable=true, indent_json=false, - dimension_separator='.' - ) where T - - if dimension_separator isa AbstractString - # Convert AbstractString to Char - dimension_separator = only(dimension_separator) + dimension_separator=nothing + ) where {T,S} + + if isnothing(dimension_separator) + dimension_separator = S + elseif dimension_separator != S + error("The dimension separator keyword value, $dimension_separator, + must agree with the dimension separator type parameter, $S") end length(dims) == length(chunks) || throw(DimensionMismatch("Dims must have the same length as chunks")) diff --git a/src/ZGroup.jl b/src/ZGroup.jl index 35515ed1..d0752ec0 100644 --- a/src/ZGroup.jl +++ b/src/ZGroup.jl @@ -20,10 +20,16 @@ function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: Abstract for d in subdirs(s,path) dshort = split(d,'/')[end] - m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) - if isa(m, ZArray) + subpath = _concatpath(path,dshort) + if is_zarray(s, subpath) + meta = getmetadata(s, subpath, false) + if s.dimension_separator != meta.dimension_separator + s = set_dimension_separator(s, meta.dimension_separator) + end + m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) arrays[dshort] = m - elseif isa(m, ZGroup) + elseif is_zgroup(s, subpath) + m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) groups[dshort] = m end end @@ -39,7 +45,7 @@ Works like `zopen` with the single difference that no error is thrown when the path or store does not point to a valid zarr array or group, but nothing is returned instead. """ -function zopen_noerr(s::AbstractStore, mode="r"; +function zopen_noerr(s::AbstractStore, mode="r"; consolidated = false, path="", lru = 0, @@ -116,8 +122,18 @@ function storefromstring(s, create=true) return storefromstring(t,s,create) end end - if create || isdir(s) - return DirectoryStore(s), "" + if create + return DirectoryStore(s), "" + elseif isdir(s) + # parse metadata to determine store kind + temp_store = DirectoryStore(s) + if is_zarray(temp_store, "") + meta = getmetadata(temp_store, "", false) + store = DirectoryStore{meta.zarr_format, meta.dimension_separator}(s) + else + store = temp_store + end + return store, "" else throw(ArgumentError("Path $s is not a directory.")) end From 2b3bbb266a91a0e677513dcbdc39af9f24a87ca9 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 25 Mar 2025 20:48:45 -0400 Subject: [PATCH 07/16] Implement VersionStorage wrapper rather than modifying AbstractStorage --- src/Storage/Storage.jl | 96 ++++------------------------------- src/Storage/consolidated.jl | 12 ++--- src/Storage/dictstore.jl | 6 +-- src/Storage/directorystore.jl | 8 ++- src/Storage/gcstore.jl | 9 ++-- src/Storage/http.jl | 25 ++++----- src/Storage/s3store.jl | 13 ++--- src/Storage/zipstore.jl | 9 ++-- src/ZArray.jl | 14 ++--- src/ZGroup.jl | 8 +-- test/runtests.jl | 12 ++--- test/storage.jl | 21 ++++++-- 12 files changed, 74 insertions(+), 159 deletions(-) diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index de40d32e..30ff1e15 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -2,29 +2,12 @@ # Defines different storages for zarr arrays. Currently only regular files (DirectoryStore) # and Dictionaries are supported -# Default Zarr version -const DV = 2 - -# Default Zarr separator - -# Default Zarr v2 separator -const DS2 = '.' -# Default Zarr v3 separator -const DS3 = '/' - -default_sep(version) = version == 2 ? DS2 : DS3 -const DS = default_sep(DV) - """ - abstract type AbstractStore{V,S} + abstract type AbstractStore This the abstract supertype for all Zarr store implementations. Currently only regular files ([`DirectoryStore`](@ref)) and Dictionaries are supported. -# Type Parameters -V is the version, either 2 or 3 -S is the dimension separator - ## Interface All subtypes of `AbstractStore` must implement the following methods: @@ -41,7 +24,7 @@ They may optionally implement the following methods: - [`store_read_strategy(s::AbstractStore)`](@ref store_read_strategy): return the read strategy for the given store. See [`SequentialRead`](@ref) and [`ConcurrentRead`](@ref). """ -abstract type AbstractStore{V,S} end +abstract type AbstractStore end #Define the interface """ @@ -87,18 +70,17 @@ function subkeys end Deletes the given key from the store. """ -@inline citostring(i::CartesianIndex, version::Int=DV, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) -@inline citostring(::CartesianIndex{0}, version::Int=DV, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) -citostring(i::CartesianIndex, s::AbstractStore{V, S}) where {V,S} = citostring(i, V, S) +citostring(i::CartesianIndex) = join(reverse((i - oneunit(i)).I), '.') +citostring(::CartesianIndex{0}) = "0" _concatpath(p,s) = isempty(p) ? s : rstrip(p,'/') * '/' * s -Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i, s)] +Base.getindex(s::AbstractStore, p, i::CartesianIndex) = s[p, citostring(i)] Base.getindex(s::AbstractStore, p, i) = s[_concatpath(p,i)] -Base.delete!(s::AbstractStore, p, i::CartesianIndex) = delete!(s, p, citostring(i, s)) +Base.delete!(s::AbstractStore, p, i::CartesianIndex) = delete!(s, p, citostring(i)) Base.delete!(s::AbstractStore, p, i) = delete!(s, _concatpath(p,i)) Base.haskey(s::AbstractStore, k) = isinitialized(s,k) Base.setindex!(s::AbstractStore,v,p,i) = setindex!(s,v,_concatpath(p,i)) -Base.setindex!(s::AbstractStore,v,p,i::CartesianIndex) = s[p, citostring(i, s)]=v +Base.setindex!(s::AbstractStore,v,p,i::CartesianIndex) = s[p, citostring(i)]=v maybecopy(x) = copy(x) @@ -129,7 +111,7 @@ end is_zgroup(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zgroup")) is_zarray(s::AbstractStore, p) = isinitialized(s,_concatpath(p,".zarray")) -isinitialized(s::AbstractStore, p, i::CartesianIndex) = isinitialized(s,p,citostring(i, s)) +isinitialized(s::AbstractStore, p, i::CartesianIndex)=isinitialized(s,p,citostring(i)) isinitialized(s::AbstractStore, p, i) = isinitialized(s,_concatpath(p,i)) isinitialized(s::AbstractStore, i) = s[i] !== nothing @@ -215,19 +197,7 @@ isemptysub(s::AbstractStore, p) = isempty(subkeys(s,p)) && isempty(subdirs(s,p)) #during auto-check of storage format when doing zopen storageregexlist = Pair[] -function Base.getproperty(store::AbstractStore{V,S}, sym::Symbol) where {V,S} - if sym == :dimension_separator - return S - elseif sym == :zarr_format - return V - else - return getfield(store, sym) - end -end -function Base.propertynames(store::AbstractStore) - return (:dimension_separator, :version, getfields(store)...) -end - +include("versionedstore.jl") include("directorystore.jl") include("dictstore.jl") include("s3store.jl") @@ -235,51 +205,3 @@ include("gcstore.jl") include("consolidated.jl") include("http.jl") include("zipstore.jl") - -# Itemize subtypes of AbstractStore for code generation below -const KnownAbstractStores = (DirectoryStore, GCStore, S3Store, ConsolidatedStore, DictStore, HTTPStore, ZipStore) - -""" - Zarr.set_dimension_separator(::AbstractStore{V}, sep::Char)::AbstractStore{V,sep} - -Returns an AbstractStore of the same type with the same `zarr_format` parameter, `V`, -but with a dimension separator of `sep`. - -# Examples - -``` -julia> Zarr.set_dimension_separator(Zarr.DictStore{2, '.'}(), '/') |> typeof -Zarr.DictStore{2, '/'} -``` - -""" -set_dimension_separator - -""" - set_zarr_format(::AbstractStore{<: Any, S}, zarr_format::Int)::AbstractStore{zarr_format,S} - -Returns an AbstractStore of the same type with the same `dimension_separator` parameter, `S`, -but with the specified `zarr_format` parameter. - -# Examples - -``` -julia> Zarr.set_zarr_format(Zarr.DictStore{2, '.'}(), 3) |> typeof -Zarr.DictStore{3, '.'} -``` - -""" -set_zarr_format - -for T in KnownAbstractStores - e = quote - # copy constructor to change zarr_format and dimension_separator parameters - (::Type{$T{V,S}})(store::$T) where {V,S} = - $T{V,S}(ntuple(i->getfield(store, i), nfields(store))...) - set_dimension_separator(store::$T{V}, sep::Char) where V = - $T{V,sep}(ntuple(i->getfield(store, i), nfields(store))...) - set_zarr_format(store::$T{<: Any, S}, zarr_format::Int) where S = - $T{zarr_format,S}(ntuple(i->getfield(store, i), nfields(store))...) - end - eval(e) -end diff --git a/src/Storage/consolidated.jl b/src/Storage/consolidated.jl index 2429622c..0b28f553 100644 --- a/src/Storage/consolidated.jl +++ b/src/Storage/consolidated.jl @@ -3,24 +3,18 @@ A store that wraps any other AbstractStore but has access to the consolidated me stored in the .zmetadata key. Whenever data attributes or metadata are accessed, the data will be read from the dictionary instead. """ -struct ConsolidatedStore{V,S,P} <: AbstractStore{V,S} +struct ConsolidatedStore{P} <: AbstractStore parent::P path::String cons::Dict{String,Any} end -function ConsolidatedStore{V,S}(s::AbstractStore, p) where {V,S} +function ConsolidatedStore(s::AbstractStore, p) d = s[p, ".zmetadata"] if d === nothing throw(ArgumentError("Could not find consolidated metadata for store $s")) end - ConsolidatedStore{V, S, typeof(s)}(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) + ConsolidatedStore(s,p,JSON.parse(String(Zarr.maybecopy(d)))["metadata"]) end -ConsolidatedStore{V}(s::AbstractStore, p) where V = ConsolidatedStore{V, default_sep(V)}(s, p) -ConsolidatedStore(s::AbstractStore, p) = ConsolidatedStore{DV,DS}(s, p) - -ConsolidatedStore(s::AbstractStore, p, d) = ConsolidatedStore{DV, DS}(s,p,d) -ConsolidatedStore{V}(s::AbstractStore, p, d) where V = ConsolidatedStore{V, default_sep(V)}(s,p,d) -ConsolidatedStore{V,S}(s::AbstractStore, p, d) where {V,S} = ConsolidatedStore{V, default_sep(V), typeof(s)}(s,p,d) function Base.show(io::IO,d::ConsolidatedStore) b = IOBuffer() diff --git a/src/Storage/dictstore.jl b/src/Storage/dictstore.jl index f6598971..7815ed20 100644 --- a/src/Storage/dictstore.jl +++ b/src/Storage/dictstore.jl @@ -1,10 +1,8 @@ # Stores data in a simple dict in memory -struct DictStore{V,S} <: AbstractStore{V,S} +struct DictStore <: AbstractStore a::Dict{String,Vector{UInt8}} - DictStore{V,S}(a=Dict{String,Vector{UInt8}}()) where {V,S} = new{V,S}(a) end -DictStore(a=Dict{String,Vector{UInt8}}()) = DictStore{DV,DS}(a) -DictStore{V}(a=Dict{String,Vector{UInt8}}()) where V = DictStore{V, default_sep(V)}(a) +DictStore() = DictStore(Dict{String,Vector{UInt8}}()) Base.show(io::IO,d::DictStore) = print(io,"Dictionary Storage") function _pdict(d::DictStore,p) diff --git a/src/Storage/directorystore.jl b/src/Storage/directorystore.jl index 0b7f6434..6ded94fb 100644 --- a/src/Storage/directorystore.jl +++ b/src/Storage/directorystore.jl @@ -9,14 +9,12 @@ function normalize_path(p::AbstractString) end # Stores files in a regular file system -struct DirectoryStore{V,S} <: AbstractStore{V,S} +struct DirectoryStore <: AbstractStore folder::String - function DirectoryStore{V,S}(p) where {V,S} + function DirectoryStore(p) mkpath(normalize_path(p)) - new{V,S}(normalize_path(p)) + new(normalize_path(p)) end - DirectoryStore(p) = DirectoryStore{DV,DS}(p) - DirectoryStore{V}(p) where V = DirectoryStore{V, default_sep(V)}(p) end function Base.getindex(d::DirectoryStore, i::String) diff --git a/src/Storage/gcstore.jl b/src/Storage/gcstore.jl index 5f0860a2..5f85820d 100644 --- a/src/Storage/gcstore.jl +++ b/src/Storage/gcstore.jl @@ -56,10 +56,10 @@ function _gcs_request_headers() return headers end -struct GCStore{V,S} <: AbstractStore{V,S} +struct GCStore <: AbstractStore bucket::String - function GCStore{V,S}(url::String) where {V,S} + function GCStore(url::String) uri = URI(url) if uri.scheme == "gs" @@ -71,8 +71,6 @@ struct GCStore{V,S} <: AbstractStore{V,S} @debug "GCS bucket: $bucket" new(bucket) end - GCStore(url::String) = GCStore{DV,DS}(url) - GCStore{V}(url::String) where V = GCStore{V, default_sep(V)}(url) end @@ -137,7 +135,6 @@ pushfirst!(storageregexlist,r"^http://storage.googleapis.com"=>GCStore) push!(storageregexlist,r"^gs://"=>GCStore) function storefromstring(::Type{<:GCStore}, url,_) - # TODO: Check metadata for version and dimension separator uri = URI(url) if uri.scheme == "gs" p = lstrip(uri.path,'/') @@ -150,4 +147,4 @@ function storefromstring(::Type{<:GCStore}, url,_) return GCStore(url),p end -store_read_strategy(::GCStore) = ConcurrentRead(concurrent_io_tasks[]) +store_read_strategy(::GCStore) = ConcurrentRead(concurrent_io_tasks[]) \ No newline at end of file diff --git a/src/Storage/http.jl b/src/Storage/http.jl index e213642a..523d701f 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -10,13 +10,11 @@ datasets being served through the [xpublish](https://xpublish.readthedocs.io/en/ python package. In case you experience performance issues, one can try to use `HTTP.set_default_connection_limit!` to increase the number of concurrent connections. """ -struct HTTPStore{V,S} <: AbstractStore{V,S} +struct HTTPStore <: AbstractStore url::String allowed_codes::Set{Int} - HTTPStore{V,S}(url, allowed_codes = Set((404,))) where {V,S} = new{V,S}(url, allowed_codes) + HTTPStore(url, allowed_codes = Set((404,))) = new(url, allowed_codes) end -HTTPStore(url) = HTTPStore{DV, DS}(url) -HTTPStore{V}(url) where V = HTTPStore{V, default_sep(V)}(url) function Base.getindex(s::HTTPStore, k::String) r = HTTP.request("GET",string(s.url,"/",k),status_exception = false,socket_type_tls=OpenSSL.SSLStream) @@ -43,15 +41,18 @@ push!(storageregexlist,r"^https://"=>HTTPStore) push!(storageregexlist,r"^http://"=>HTTPStore) function storefromstring(::Type{<:HTTPStore}, s,_) http_store = HTTPStore(s) - if is_zarray(http_store, "") - meta = getmetadata(http_store, "", false) - http_store = HTTPStore{meta.zarr_format, meta.dimension_separator}(s) - end - if http_store["", ".zmetadata"] !== nothing - return ConsolidatedStore(http_store,""),"" - else - return http_store,"" + try + if is_zarray(http_store, "") + meta = getmetadata(http_store, "", false) + http_store = VersionedStore{meta.zarr_format, meta.dimension_separator}(http_store) + end + if http_store["", ".zmetadata"] !== nothing + return ConsolidatedStore(http_store,""),"" + end + catch err + @warn exception=err "Additional metadata was not available for HTTPStore." end + return http_store,"" end """ diff --git a/src/Storage/s3store.jl b/src/Storage/s3store.jl index 03681fa3..aaab004f 100644 --- a/src/Storage/s3store.jl +++ b/src/Storage/s3store.jl @@ -1,23 +1,19 @@ using AWSS3: AWSS3, s3_put, s3_get, s3_delete, s3_list_objects, s3_exists -struct S3Store{V,S} <: AbstractStore{V,S} +struct S3Store <: AbstractStore bucket::String aws::AWSS3.AWS.AbstractAWSConfig end -function S3Store{V,S}(bucket::String; +function S3Store(bucket::String; aws = nothing, - ) where {V,S} + ) if aws === nothing aws = AWSS3.AWS.global_aws_config() end - S3Store{V,S}(bucket, aws) + S3Store(bucket, aws) end -S3Store(bucket, aws) = S3Store{DV,DS}(bucket, aws) -S3Store{V}(bucket, aws) where V = S3Store{V, default_sep(V)}(bucket, aws) -S3Store(bucket; aws = nothing) = S3Store{DV, DS}(bucket; aws) -S3Store{V}(bucket; aws = nothing) where V = S3Store{V, default_sep(V)}(bucket; aws) Base.show(io::IO,::S3Store) = print(io,"S3 Object Storage") @@ -78,7 +74,6 @@ allstrings(v,prefixkey) = [rstrip(String(v[prefixkey]),'/')] push!(storageregexlist,r"^s3://"=>S3Store) function storefromstring(::Type{<:S3Store}, s, _) - # TODO: Check metadata for version and dimension separator decomp = split(s,"/",keepempty=false) bucket = decomp[2] path = join(decomp[3:end],"/") diff --git a/src/Storage/zipstore.jl b/src/Storage/zipstore.jl index 8fb5aca0..8e8bbd27 100644 --- a/src/Storage/zipstore.jl +++ b/src/Storage/zipstore.jl @@ -5,14 +5,13 @@ import ZipArchives A read only store that wraps an `AbstractVector{UInt8}` that contains a zip file. """ -struct ZipStore{V, S, T <: AbstractVector{UInt8}} <: AbstractStore{V, S} +struct ZipStore{T <: AbstractVector{UInt8}} <: AbstractStore r::ZipArchives.ZipBufferReader{T} - ZipStore{V,S}(data::T) where {V,S,T} = new{V, S, T}(ZipArchives.ZipBufferReader(data)) - ZipStore{V}(data::AbstractVector{UInt8}) where V = ZipStore{V, default_sep(V)}(data) - ZipStore(data::AbstractVector{UInt8}) = ZipStore{DV,DS}(data) end +ZipStore(data::AbstractVector{UInt8}) = ZipStore(ZipArchives.ZipBufferReader(data)) + Base.show(io::IO,::ZipStore) = print(io,"Read Only Zip Storage") function Base.getindex(d::ZipStore, k::AbstractString)::Union{Nothing, Vector{UInt8}} @@ -95,4 +94,4 @@ function _writezip(w::ZipArchives.ZipWriter, s::AbstractStore, p::String) for subdir in subdirs(s, p) _writezip(w, s, _make_prefix(p)*subdir) end -end +end \ No newline at end of file diff --git a/src/ZArray.jl b/src/ZArray.jl index 0aa472b4..961c798f 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -326,14 +326,14 @@ function zcreate(::Type{T}, dims::Integer...; end if path===nothing - store = DictStore{DV, dimension_separator}() + store = VersionedStore{DV, dimension_separator}(DictStore()) else - store = DirectoryStore{DV, dimension_separator}(joinpath(path,name)) + store = VersionedStore{DV, dimension_separator}(DirectoryStore(joinpath(path,name))) end zcreate(T, store, dims...; kwargs...) end -function zcreate(::Type{T},storage::AbstractStore{<: Any,S}, +function zcreate(::Type{T},storage::AbstractStore, dims...; path = "", chunks=dims, @@ -345,13 +345,13 @@ function zcreate(::Type{T},storage::AbstractStore{<: Any,S}, writeable=true, indent_json=false, dimension_separator=nothing - ) where {T,S} + ) where {T} if isnothing(dimension_separator) - dimension_separator = S - elseif dimension_separator != S + dimension_separator = Zarr.dimension_separator(storage) + elseif dimension_separator != Zarr.dimension_separator(storage) error("The dimension separator keyword value, $dimension_separator, - must agree with the dimension separator type parameter, $S") + must agree with the dimension separator type parameter, $(Zarr.dimension_separator(storage))") end length(dims) == length(chunks) || throw(DimensionMismatch("Dims must have the same length as chunks")) diff --git a/src/ZGroup.jl b/src/ZGroup.jl index d0752ec0..e4d2100b 100644 --- a/src/ZGroup.jl +++ b/src/ZGroup.jl @@ -23,7 +23,7 @@ function ZGroup(s::T,mode="r",path="";fill_as_missing=false) where T <: Abstract subpath = _concatpath(path,dshort) if is_zarray(s, subpath) meta = getmetadata(s, subpath, false) - if s.dimension_separator != meta.dimension_separator + if dimension_separator(s) != meta.dimension_separator s = set_dimension_separator(s, meta.dimension_separator) end m = zopen_noerr(s,mode,path=_concatpath(path,dshort),fill_as_missing=fill_as_missing) @@ -123,15 +123,15 @@ function storefromstring(s, create=true) end end if create - return DirectoryStore(s), "" + return VersionedStore(DirectoryStore(s)), "" elseif isdir(s) # parse metadata to determine store kind temp_store = DirectoryStore(s) if is_zarray(temp_store, "") meta = getmetadata(temp_store, "", false) - store = DirectoryStore{meta.zarr_format, meta.dimension_separator}(s) + store = VersionedStore{meta.zarr_format, meta.dimension_separator}(temp_store) else - store = temp_store + store = VersionedStore(temp_store) end return store, "" else diff --git a/test/runtests.jl b/test/runtests.jl index 34790c9b..035ca33f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,11 +15,11 @@ CondaPkg.add("zarr"; version="2.*") @testset "fields" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.DictStore{2, '.'}} + Zarr.VersionedStore{2, '.', Zarr.DictStore}} - @test length(z.storage.a) === 3 - @test length(z.storage.a["0.0"]) === 64 - @test eltype(z.storage.a["0.0"]) === UInt8 + @test length(z.storage.parent.a) === 3 + @test length(z.storage.parent.a["0.0"]) === 64 + @test eltype(z.storage.parent.a["0.0"]) === UInt8 @test z.metadata.shape[] === (2, 3) @test z.metadata.order === 'C' @test z.metadata.chunks === (2, 3) @@ -40,7 +40,7 @@ CondaPkg.add("zarr"; version="2.*") @testset "methods" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.DictStore{2, '.'}} + Zarr.VersionedStore{2, '.', Zarr.DictStore}} @test eltype(z) === Int64 @test ndims(z) === 2 @@ -60,7 +60,7 @@ CondaPkg.add("zarr"; version="2.*") compressor=Zarr.NoCompressor()) @test z.metadata.compressor === Zarr.NoCompressor() - @test z.storage === Zarr.DirectoryStore("$dir/$name") + @test z.storage === Zarr.VersionedStore{2 ,'.'}(Zarr.DirectoryStore("$dir/$name")) @test isdir("$dir/$name") @test ispath("$dir/$name/.zarray") @test ispath("$dir/$name/.zattrs") diff --git a/test/storage.jl b/test/storage.jl index 1a73ad10..be97f402 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -32,7 +32,7 @@ end """ Function to test the interface of AbstractStore. Every complete implementation should pass this test. """ -function test_store_common(ds::Zarr.AbstractStore{V,S}) where {V,S} +function test_store_common(ds::Zarr.AbstractStore) @test !Zarr.is_zgroup(ds,"") ds[".zgroup"]=rand(UInt8,50) @test haskey(ds,".zgroup") @@ -52,6 +52,8 @@ function test_store_common(ds::Zarr.AbstractStore{V,S}) where {V,S} @test Zarr.subdirs(ds,"bar") == String[] #Test getindex and setindex data = rand(UInt8,50) + V = Zarr.zarr_format(ds) + S = Zarr.dimension_separator(ds) first_ci_str = Zarr.citostring(CartesianIndex(1,1,1), V, S) second_ci_str = Zarr.citostring(CartesianIndex(2,1,1), V, S) ds["bar/" * first_ci_str] = data @@ -185,6 +187,15 @@ end run(s, wait=false) cfg = MinioConfig("http://localhost:9001") Zarr.AWSS3.global_aws_config(cfg) + # Try to communicate with the server for 10 seconds + for i in 1:10 + try + s3_list_objects(cfg) + break + catch err + sleep(1) + end + end Zarr.AWSS3.S3.create_bucket("zarrdata") ds = S3Store("zarrdata") test_store_common(ds) @@ -256,10 +267,10 @@ end g = zgroup(s, attrs = Dict("groupatt"=>5)) a = zcreate(Int,g,"a",10,20,chunks=(5,5),attrs=Dict("arratt"=>2.5),fill_value = -1) @async HTTP.serve(Zarr.zarr_req_handler(s,g.path,403),ip,port,server=server) - g3 = zopen("http://$ip:$port") - @test_throws "Received error code 403" g3["a"][:,:] - Zarr.missing_chunk_return_code!(g3.storage,403) - @test all(==(-1),g3["a"][:,:]) + @test_throws "Received error code 403" zopen("http://$ip:$port") + # @test_throws "Received error code 403" g3["a"][:,:] + # Zarr.missing_chunk_return_code!(g3.storage,403) + # @test all(==(-1),g3["a"][:,:]) close(server) end From 5f35ebf9b29456322812378b10d4b04cff73a10c Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 31 Mar 2025 18:29:28 -0400 Subject: [PATCH 08/16] Fix ConslidatedStore wrapper around HTTP This reduces the test diff --- src/Storage/http.jl | 6 +++--- test/storage.jl | 17 ++++------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/Storage/http.jl b/src/Storage/http.jl index 523d701f..1263f8c2 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -42,13 +42,13 @@ push!(storageregexlist,r"^http://"=>HTTPStore) function storefromstring(::Type{<:HTTPStore}, s,_) http_store = HTTPStore(s) try + if http_store["", ".zmetadata"] !== nothing + http_store = ConsolidatedStore(http_store,"") + end if is_zarray(http_store, "") meta = getmetadata(http_store, "", false) http_store = VersionedStore{meta.zarr_format, meta.dimension_separator}(http_store) end - if http_store["", ".zmetadata"] !== nothing - return ConsolidatedStore(http_store,""),"" - end catch err @warn exception=err "Additional metadata was not available for HTTPStore." end diff --git a/test/storage.jl b/test/storage.jl index be97f402..320239e9 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -187,15 +187,6 @@ end run(s, wait=false) cfg = MinioConfig("http://localhost:9001") Zarr.AWSS3.global_aws_config(cfg) - # Try to communicate with the server for 10 seconds - for i in 1:10 - try - s3_list_objects(cfg) - break - catch err - sleep(1) - end - end Zarr.AWSS3.S3.create_bucket("zarrdata") ds = S3Store("zarrdata") test_store_common(ds) @@ -267,10 +258,10 @@ end g = zgroup(s, attrs = Dict("groupatt"=>5)) a = zcreate(Int,g,"a",10,20,chunks=(5,5),attrs=Dict("arratt"=>2.5),fill_value = -1) @async HTTP.serve(Zarr.zarr_req_handler(s,g.path,403),ip,port,server=server) - @test_throws "Received error code 403" zopen("http://$ip:$port") - # @test_throws "Received error code 403" g3["a"][:,:] - # Zarr.missing_chunk_return_code!(g3.storage,403) - # @test all(==(-1),g3["a"][:,:]) + g3 = zopen("http://$ip:$port") + @test_throws "Received error code 403" g3["a"][:,:] + Zarr.missing_chunk_return_code!(g3.storage,403) + @test all(==(-1),g3["a"][:,:]) close(server) end From c685387a1b9c18e4377020b2742a231b4a62626a Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 31 Mar 2025 18:43:40 -0400 Subject: [PATCH 09/16] Add getproperty forwarding from VersionedStorage This also reduces the test diff --- test/runtests.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 035ca33f..a5454557 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,9 +17,9 @@ CondaPkg.add("zarr"; version="2.*") @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, Zarr.VersionedStore{2, '.', Zarr.DictStore}} - @test length(z.storage.parent.a) === 3 - @test length(z.storage.parent.a["0.0"]) === 64 - @test eltype(z.storage.parent.a["0.0"]) === UInt8 + @test length(z.storage.a) === 3 + @test length(z.storage.a["0.0"]) === 64 + @test eltype(z.storage.a["0.0"]) === UInt8 @test z.metadata.shape[] === (2, 3) @test z.metadata.order === 'C' @test z.metadata.chunks === (2, 3) From 8d5606d6acc0e6b9fbc443bc13aded4456536063 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 31 Mar 2025 19:05:10 -0400 Subject: [PATCH 10/16] Add some tests for propertynames --- src/metadata.jl | 1 + test/runtests.jl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/metadata.jl b/src/metadata.jl index bfadfb03..1391a131 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -152,6 +152,7 @@ function Base.getproperty(m::DimensionSeparatedMetadata{S}, name::Symbol) where end return getfield(m, name) end +Base.propertynames(m::Metadata) = (fieldnames(Metadata)..., :dimension_separator) #To make unit tests pass with ref shape import Base.== diff --git a/test/runtests.jl b/test/runtests.jl index a5454557..ad434fe1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,7 @@ CondaPkg.add("zarr"; version="2.*") @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, Zarr.VersionedStore{2, '.', Zarr.DictStore}} + @test :a ∈ propertynames(z.storage) @test length(z.storage.a) === 3 @test length(z.storage.a["0.0"]) === 64 @test eltype(z.storage.a["0.0"]) === UInt8 @@ -31,6 +32,8 @@ CondaPkg.add("zarr"; version="2.*") @test z.metadata.compressor.shuffle === 1 @test z.attrs == Dict{Any, Any}() @test z.writeable === true + @test z.metadata.dimension_separator === Zarr.DS + @test :dimension_separator ∈ propertynames(z.metadata) @test_throws ArgumentError zzeros(Int64,2,3, chunks = (0,1)) @test_throws ArgumentError zzeros(Int64,0,-1) @test_throws ArgumentError Zarr.Metadata(zeros(2,2), (2,2), zarr_format = 3) From a6fcc2b7837d0f45169760ce03ec67dce6786610 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 31 Mar 2025 19:11:10 -0400 Subject: [PATCH 11/16] Add Storage/versionstore.jl --- src/Storage/versionedstore.jl | 124 ++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 src/Storage/versionedstore.jl diff --git a/src/Storage/versionedstore.jl b/src/Storage/versionedstore.jl new file mode 100644 index 00000000..cec72ee0 --- /dev/null +++ b/src/Storage/versionedstore.jl @@ -0,0 +1,124 @@ +# Default Zarr version +const DV = 2 + +# Default Zarr separator + +# Default Zarr v2 separator +const DS2 = '.' +# Default Zarr v3 separator +const DS3 = '/' + +default_sep(version) = version == 2 ? DS2 : DS3 +const DS = default_sep(DV) + +struct VersionedStore{V,SEP,STORE <: AbstractStore} <: AbstractStore + parent::STORE +end +VersionedStore(args...) = VersionedStore{DV,DS}(args...) +VersionedStore{V}(args...) where V = VersionedStore{V, default_sep(V)}(args...) +VersionedStore{<: Any, S}(args...) where S = VersionedStore{DV, S}(args...) +function VersionedStore{V,S}(store::AbstractStore) where {V,S} + return VersionedStore{V,S,typeof(store)}(store) +end +function VersionedStore{V,S}(store::VersionedStore) where {V,S} + p = parent(store) + return VersionedStore{V,S,typeof(p)}(p) +end + +Base.parent(store::VersionedStore) = store.parent + +@inline citostring(i::CartesianIndex, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) +@inline citostring(::CartesianIndex{0}, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) +citostring(i::CartesianIndex, s::VersionedStore{V, S}) where {V,S} = citostring(i, V, S) + +Base.getindex(s::VersionedStore, p, i::CartesianIndex) = s[p, citostring(i,s)] +Base.delete!(s::VersionedStore, p, i::CartesianIndex) = delete!(s, p, citostring(i,s)) +Base.setindex!(s::VersionedStore, v, p, i::CartesianIndex) = s[p, citostring(i,s)]=v + +isinitialized(s::VersionedStore, p, i::CartesianIndex) = isinitialized(s,p,citostring(i, s)) + +""" +- [`storagesize(d::AbstractStore, p::AbstractString)`](@ref storagesize) +- [`subdirs(d::AbstractStore, p::AbstractString)`](@ref subdirs) +- [`subkeys(d::AbstractStore, p::AbstractString)`](@ref subkeys) +- [`isinitialized(d::AbstractStore, p::AbstractString)`](@ref isinitialized) +- [`storefromstring(::Type{<: AbstractStore}, s, _)`](@ref storefromstring) +- `Base.getindex(d::AbstractStore, i::AbstractString)`: return the data stored in key `i` as a Vector{UInt8} +- `Base.setindex!(d::AbstractStore, v, i::AbstractString)`: write the values in `v` to the key `i` of the given store `d` +""" + +storagesize(d::VersionedStore, p::AbstractString) = storagesize(parent(d), p) +subdirs(d::VersionedStore, p::AbstractString) = subdirs(parent(d), p) +subkeys(d::VersionedStore, p::AbstractString) = subkeys(parent(d), p) +isinitialized(d::VersionedStore, p::AbstractString) = isinitialized(parent(d), p) +storefromstring(::Type{VersionedStore{<: Any, <: Any, STORE}}, s, _) where STORE = VersionedStore{DV,DS}(storefromstring(STORE, s)) +storefromstring(::Type{VersionedStore{V,S}}, s, _) where {V,S} = VersionedStore{DV,DS}(storefromstring(s)) +storefromstring(::Type{VersionedStore{V,S,STORE}}, s, _) where {V,S,STORE} = VersionedStore{V,S,STORE}(storefromstring(STORE, s)) +Base.getindex(d::VersionedStore, i::AbstractString) = getindex(parent(d), i) +Base.setindex!(d::VersionedStore, v, i::AbstractString) = setindex!(parent(d), v, i) +Base.delete!(d::VersionedStore, i::AbstractString) = delete!(parent(d), i) + + +function Base.getproperty(store::VersionedStore{V,S}, sym::Symbol) where {V,S} + if sym == :dimension_separator + return S + elseif sym == :zarr_format + return V + elseif sym ∈ propertynames(getfield(store, :parent)) + # Support forwarding of properties to parent + return getproperty(store.parent, sym) + else + getfield(store, sym) + end +end +function Base.propertynames(store::VersionedStore) + return (:dimension_separator, :zarr_format, fieldnames(typeof(store))..., propertynames(store.parent)...) +end + + +""" + Zarr.set_dimension_separator(store::VersionedStore{V}, sep::Char)::VersionedStore{V,sep} + +Returns a VersionedStore of the same type with the same `zarr_format` parameter, `V`, +but with a dimension separator of `sep`. Note that this does not mutate the original store. + +# Examples + +``` +julia> Zarr.set_dimension_separator(Zarr.VersionedStore{2, '.'}(Zarr.DictStore(), '/')) |> typeof +Zarr.VersionedStore{2, '/',Zarr.DictStore} +``` + +""" +function set_dimension_separator(store::VersionedStore{V}, sep::Char) where V + return VersionedStore{V,sep}(store) +end +function set_dimension_separator(store::AbstractStore, sep::Char) + return VersionedStore{<: Any,sep}(store) +end + +""" + set_zarr_format(::VersionedStore{<: Any, S}, zarr_format::Int)::VersionedStore{zarr_format,S} + +Returns a VersionedStore of the same type with the same `dimension_separator` parameter, `S`, +but with the specified `zarr_format` parameter. Note that this does not mutate the original store. + +# Examples + +``` +julia> Zarr.set_zarr_format(Zarr.VersionedStore{2, '.'}(Zarr.DictStore(), 3)) |> typeof +Zarr.VersionedStore{3, '.', DictStore} +``` + +""" +function set_zarr_format(store::VersionedStore{<: Any, S}, zarr_format::Int) where S + return VersionedStore{zarr_format,S}(store) +end +function set_zarr_format(store::AbstractStore, zarr_format::Int) + return VersionedStore{zarr_format}(store) +end + +dimension_separator(::AbstractStore) = DS +dimension_separator(::VersionedStore{<: Any,S}) where S = S +zarr_format(::AbstractStore) = DV +zarr_format(::VersionedStore{V}) where V = V From f6883f808aef2481fa1191568540f6008f884af7 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 31 Mar 2025 23:42:33 -0400 Subject: [PATCH 12/16] Add VersionedStorage param change constructors --- src/Storage/versionedstore.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storage/versionedstore.jl b/src/Storage/versionedstore.jl index cec72ee0..17aeaf0a 100644 --- a/src/Storage/versionedstore.jl +++ b/src/Storage/versionedstore.jl @@ -15,8 +15,11 @@ struct VersionedStore{V,SEP,STORE <: AbstractStore} <: AbstractStore parent::STORE end VersionedStore(args...) = VersionedStore{DV,DS}(args...) +VersionedStore(s::VersionedStore) = s VersionedStore{V}(args...) where V = VersionedStore{V, default_sep(V)}(args...) +VersionedStore{V}(s::VersionedStore{<:Any,S}) where {V,S} = VersionedStore{V, S}(s) VersionedStore{<: Any, S}(args...) where S = VersionedStore{DV, S}(args...) +VersionedStore{<: Any, S}(s::VersionedStore{V}) where {V,S} = VersionedStore{V, S}(s) function VersionedStore{V,S}(store::AbstractStore) where {V,S} return VersionedStore{V,S,typeof(store)}(store) end From 3cf746da8a5b9e7fa6c6a3ecfe17c0e6cf4eeb50 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 6 May 2025 18:41:39 -0400 Subject: [PATCH 13/16] Add V2 chunk encoding support --- src/Storage/versionedstore.jl | 10 +++++++++- test/storage.jl | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Storage/versionedstore.jl b/src/Storage/versionedstore.jl index 17aeaf0a..45f2bad8 100644 --- a/src/Storage/versionedstore.jl +++ b/src/Storage/versionedstore.jl @@ -8,9 +8,16 @@ const DS2 = '.' # Default Zarr v3 separator const DS3 = '/' -default_sep(version) = version == 2 ? DS2 : DS3 +default_sep(version) = version == 2 ? DS2 : + version == 3 ? DS3 : + error("Unknown version: $version") const DS = default_sep(DV) +# Chunk Key Encodings for Zarr v3 +# A Char is the separator for the default chunk key encoding +struct V2ChunkKeyEncoding{SEP} end + +# Version store differentiates between Zarr format versions struct VersionedStore{V,SEP,STORE <: AbstractStore} <: AbstractStore parent::STORE end @@ -32,6 +39,7 @@ Base.parent(store::VersionedStore) = store.parent @inline citostring(i::CartesianIndex, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) @inline citostring(::CartesianIndex{0}, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) +@inline citostring(i::CartesianIndex, ::Int, ::Type{V2ChunkKeyEncoding{S}}) where S = citostring(i, 2, S) citostring(i::CartesianIndex, s::VersionedStore{V, S}) where {V,S} = citostring(i, V, S) Base.getindex(s::VersionedStore, p, i::CartesianIndex) = s[p, citostring(i,s)] diff --git a/test/storage.jl b/test/storage.jl index 320239e9..1ef34d72 100644 --- a/test/storage.jl +++ b/test/storage.jl @@ -9,21 +9,29 @@ end @testset "Version and Dimension Separator" begin + v2cke_period = Zarr.V2ChunkKeyEncoding{'.'} + v2cke_slash = Zarr.V2ChunkKeyEncoding{'/'} let ci = CartesianIndex() @test Zarr.citostring(ci, 2, '.') == "0" @test Zarr.citostring(ci, 2, '/') == "0" + @test Zarr.citostring(ci, 3, v2cke_period) == "0" + @test Zarr.citostring(ci, 3, v2cke_slash) == "0" @test Zarr.citostring(ci, 3, '.') == "c.0" @test Zarr.citostring(ci, 3, '/') == "c/0" end let ci = CartesianIndex(1,1,1) @test Zarr.citostring(ci, 2, '.') == "0.0.0" @test Zarr.citostring(ci, 2, '/') == "0/0/0" + @test Zarr.citostring(ci, 3, v2cke_period) == "0.0.0" + @test Zarr.citostring(ci, 3, v2cke_slash) == "0/0/0" @test Zarr.citostring(ci, 3, '.') == "c.0.0.0" @test Zarr.citostring(ci, 3, '/') == "c/0/0/0" end let ci = CartesianIndex(1,3,5) @test Zarr.citostring(ci, 2, '.') == "4.2.0" @test Zarr.citostring(ci, 2, '/') == "4/2/0" + @test Zarr.citostring(ci, 3, v2cke_period) == "4.2.0" + @test Zarr.citostring(ci, 3, v2cke_slash) == "4/2/0" @test Zarr.citostring(ci, 3, '.') == "c.4.2.0" @test Zarr.citostring(ci, 3, '/') == "c/4/2/0" end From d218dc249cd0f238fb364b479fbec675d5a4620a Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 6 May 2025 20:06:26 -0400 Subject: [PATCH 14/16] Fix Base.UInt8 constructor for ASCIIChar --- src/metadata.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metadata.jl b/src/metadata.jl index 1391a131..fa564e37 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -17,7 +17,7 @@ using .MaxLengthStrings: MaxLengthString primitive type ASCIIChar <: AbstractChar 8 end ASCIIChar(x::UInt8) = reinterpret(ASCIIChar, x) ASCIIChar(x::Integer) = ASCIIChar(UInt8(x)) -UInt8(x::ASCIIChar) = reinterpret(UInt8, x) +Base.UInt8(x::ASCIIChar) = reinterpret(UInt8, x) Base.codepoint(x::ASCIIChar) = UInt8(x) Base.show(io::IO, x::ASCIIChar) = print(io, Char(x)) Base.zero(::Union{ASCIIChar,Type{ASCIIChar}}) = ASCIIChar(Base.zero(UInt8)) From 08288fda4e3d2f7281e9ad2ac8bbb22d6042c678 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 19 May 2025 04:14:10 -0400 Subject: [PATCH 15/16] Change VersionedStore to FormattedStore --- src/Storage/Storage.jl | 2 +- src/Storage/formattedstore.jl | 174 ++++++++++++++++++++++++++++++++++ src/Storage/http.jl | 2 +- src/Storage/versionedstore.jl | 135 -------------------------- src/ZArray.jl | 4 +- src/ZGroup.jl | 6 +- test/runtests.jl | 6 +- 7 files changed, 184 insertions(+), 145 deletions(-) create mode 100644 src/Storage/formattedstore.jl delete mode 100644 src/Storage/versionedstore.jl diff --git a/src/Storage/Storage.jl b/src/Storage/Storage.jl index 30ff1e15..0d17bb1a 100644 --- a/src/Storage/Storage.jl +++ b/src/Storage/Storage.jl @@ -197,7 +197,7 @@ isemptysub(s::AbstractStore, p) = isempty(subkeys(s,p)) && isempty(subdirs(s,p)) #during auto-check of storage format when doing zopen storageregexlist = Pair[] -include("versionedstore.jl") +include("formattedstore.jl") include("directorystore.jl") include("dictstore.jl") include("s3store.jl") diff --git a/src/Storage/formattedstore.jl b/src/Storage/formattedstore.jl new file mode 100644 index 00000000..b60ab2da --- /dev/null +++ b/src/Storage/formattedstore.jl @@ -0,0 +1,174 @@ +# Default Zarr version +const DV = 2 + +# Default Zarr separator + +# Default Zarr v2 separator +const DS2 = '.' +# Default Zarr v3 separator +const DS3 = '/' + +default_sep(version) = version == 2 ? DS2 : + version == 3 ? DS3 : + error("Unknown version: $version") +const DS = default_sep(DV) + +# Chunk Key Encodings for Zarr v3 +# A Char is the separator for the default chunk key encoding +abstract type ChunkKeyEncoding end +struct V2ChunkKeyEncoding{SEP} <: ChunkKeyEncoding end + +""" + FormattedStore{V,CKE,STORE <: AbstractStore} <: AbstractStore + +FormattedStore wraps an AbstractStore to indicate a specific Zarr format. +The path of a chunk depends on the version and chunk key encoding. + +# Type Parameters + +- V: Zarr format version +- CKE: Chunk key encoding or dimension separator. + CKE could be a `Char` or a subtype of `ChunkKeyEncoding`. +- STORE: Type of AbstractStore wrapped + +# Chunk Path Formats + +## Zarr version 2 + +### '.' dimension separator (default) + +Chunks are encoded as "1.2.3" + +### '/' dimension separator + +Chunks are encoded as "1/2/3" + +## Zarr version 3 + +### '/' dimension separator (default) + +Chunks are encoded as "c/1/2/3" + +### '.' dimension separator + +Chunks are encoded as "c.1.2.3" + +### V2ChunkKeyEncoding{SEP} + +See Zarr version 2 +""" +struct FormattedStore{V,SEP,STORE <: AbstractStore} <: AbstractStore + parent::STORE +end +FormattedStore(args...) = FormattedStore{DV,DS}(args...) +FormattedStore(s::FormattedStore) = s +FormattedStore{V}(args...) where V = FormattedStore{V, default_sep(V)}(args...) +FormattedStore{V}(s::FormattedStore{<:Any,S}) where {V,S} = FormattedStore{V, S}(s) +FormattedStore{<: Any, S}(args...) where S = FormattedStore{DV, S}(args...) +FormattedStore{<: Any, S}(s::FormattedStore{V}) where {V,S} = FormattedStore{V, S}(s) +function FormattedStore{V,S}(store::AbstractStore) where {V,S} + return FormattedStore{V,S,typeof(store)}(store) +end +function FormattedStore{V,S}(store::FormattedStore) where {V,S} + p = parent(store) + return FormattedStore{V,S,typeof(p)}(p) +end + +Base.parent(store::FormattedStore) = store.parent + +@inline citostring(i::CartesianIndex, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) +@inline citostring(::CartesianIndex{0}, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) +@inline citostring(i::CartesianIndex, ::Int, ::Type{V2ChunkKeyEncoding{S}}) where S = citostring(i, 2, S) +citostring(i::CartesianIndex, s::FormattedStore{V, S}) where {V,S} = citostring(i, V, S) + +Base.getindex(s::FormattedStore, p, i::CartesianIndex) = s[p, citostring(i,s)] +Base.delete!(s::FormattedStore, p, i::CartesianIndex) = delete!(s, p, citostring(i,s)) +Base.setindex!(s::FormattedStore, v, p, i::CartesianIndex) = s[p, citostring(i,s)]=v + +isinitialized(s::FormattedStore, p, i::CartesianIndex) = isinitialized(s,p,citostring(i, s)) + +""" +- [`storagesize(d::AbstractStore, p::AbstractString)`](@ref storagesize) +- [`subdirs(d::AbstractStore, p::AbstractString)`](@ref subdirs) +- [`subkeys(d::AbstractStore, p::AbstractString)`](@ref subkeys) +- [`isinitialized(d::AbstractStore, p::AbstractString)`](@ref isinitialized) +- [`storefromstring(::Type{<: AbstractStore}, s, _)`](@ref storefromstring) +- `Base.getindex(d::AbstractStore, i::AbstractString)`: return the data stored in key `i` as a Vector{UInt8} +- `Base.setindex!(d::AbstractStore, v, i::AbstractString)`: write the values in `v` to the key `i` of the given store `d` +""" + +storagesize(d::FormattedStore, p::AbstractString) = storagesize(parent(d), p) +subdirs(d::FormattedStore, p::AbstractString) = subdirs(parent(d), p) +subkeys(d::FormattedStore, p::AbstractString) = subkeys(parent(d), p) +isinitialized(d::FormattedStore, p::AbstractString) = isinitialized(parent(d), p) +storefromstring(::Type{FormattedStore{<: Any, <: Any, STORE}}, s, _) where STORE = FormattedStore{DV,DS}(storefromstring(STORE, s)) +storefromstring(::Type{FormattedStore{V,S}}, s, _) where {V,S} = FormattedStore{DV,DS}(storefromstring(s)) +storefromstring(::Type{FormattedStore{V,S,STORE}}, s, _) where {V,S,STORE} = FormattedStore{V,S,STORE}(storefromstring(STORE, s)) +Base.getindex(d::FormattedStore, i::AbstractString) = getindex(parent(d), i) +Base.setindex!(d::FormattedStore, v, i::AbstractString) = setindex!(parent(d), v, i) +Base.delete!(d::FormattedStore, i::AbstractString) = delete!(parent(d), i) + + +function Base.getproperty(store::FormattedStore{V,S}, sym::Symbol) where {V,S} + if sym == :dimension_separator + return S + elseif sym == :zarr_format + return V + elseif sym ∈ propertynames(getfield(store, :parent)) + # Support forwarding of properties to parent + return getproperty(store.parent, sym) + else + getfield(store, sym) + end +end +function Base.propertynames(store::FormattedStore) + return (:dimension_separator, :zarr_format, fieldnames(typeof(store))..., propertynames(store.parent)...) +end + + +""" + Zarr.set_dimension_separator(store::FormattedStore{V}, sep::Char)::FormattedStore{V,sep} + +Returns a FormattedStore of the same type with the same `zarr_format` parameter, `V`, +but with a dimension separator of `sep`. Note that this does not mutate the original store. + +# Examples + +``` +julia> Zarr.set_dimension_separator(Zarr.FormattedStore{2, '.'}(Zarr.DictStore(), '/')) |> typeof +Zarr.FormattedStore{2, '/',Zarr.DictStore} +``` + +""" +function set_dimension_separator(store::FormattedStore{V}, sep::Char) where V + return FormattedStore{V,sep}(store) +end +function set_dimension_separator(store::AbstractStore, sep::Char) + return FormattedStore{<: Any,sep}(store) +end + +""" + set_zarr_format(::FormattedStore{<: Any, S}, zarr_format::Int)::FormattedStore{zarr_format,S} + +Returns a FormattedStore of the same type with the same `dimension_separator` parameter, `S`, +but with the specified `zarr_format` parameter. Note that this does not mutate the original store. + +# Examples + +``` +julia> Zarr.set_zarr_format(Zarr.FormattedStore{2, '.'}(Zarr.DictStore(), 3)) |> typeof +Zarr.FormattedStore{3, '.', DictStore} +``` + +""" +function set_zarr_format(store::FormattedStore{<: Any, S}, zarr_format::Int) where S + return FormattedStore{zarr_format,S}(store) +end +function set_zarr_format(store::AbstractStore, zarr_format::Int) + return FormattedStore{zarr_format}(store) +end + +dimension_separator(::AbstractStore) = DS +dimension_separator(::FormattedStore{<: Any,S}) where S = S +zarr_format(::AbstractStore) = DV +zarr_format(::FormattedStore{V}) where V = V diff --git a/src/Storage/http.jl b/src/Storage/http.jl index 1263f8c2..980284f2 100644 --- a/src/Storage/http.jl +++ b/src/Storage/http.jl @@ -47,7 +47,7 @@ function storefromstring(::Type{<:HTTPStore}, s,_) end if is_zarray(http_store, "") meta = getmetadata(http_store, "", false) - http_store = VersionedStore{meta.zarr_format, meta.dimension_separator}(http_store) + http_store = FormattedStore{meta.zarr_format, meta.dimension_separator}(http_store) end catch err @warn exception=err "Additional metadata was not available for HTTPStore." diff --git a/src/Storage/versionedstore.jl b/src/Storage/versionedstore.jl deleted file mode 100644 index 45f2bad8..00000000 --- a/src/Storage/versionedstore.jl +++ /dev/null @@ -1,135 +0,0 @@ -# Default Zarr version -const DV = 2 - -# Default Zarr separator - -# Default Zarr v2 separator -const DS2 = '.' -# Default Zarr v3 separator -const DS3 = '/' - -default_sep(version) = version == 2 ? DS2 : - version == 3 ? DS3 : - error("Unknown version: $version") -const DS = default_sep(DV) - -# Chunk Key Encodings for Zarr v3 -# A Char is the separator for the default chunk key encoding -struct V2ChunkKeyEncoding{SEP} end - -# Version store differentiates between Zarr format versions -struct VersionedStore{V,SEP,STORE <: AbstractStore} <: AbstractStore - parent::STORE -end -VersionedStore(args...) = VersionedStore{DV,DS}(args...) -VersionedStore(s::VersionedStore) = s -VersionedStore{V}(args...) where V = VersionedStore{V, default_sep(V)}(args...) -VersionedStore{V}(s::VersionedStore{<:Any,S}) where {V,S} = VersionedStore{V, S}(s) -VersionedStore{<: Any, S}(args...) where S = VersionedStore{DV, S}(args...) -VersionedStore{<: Any, S}(s::VersionedStore{V}) where {V,S} = VersionedStore{V, S}(s) -function VersionedStore{V,S}(store::AbstractStore) where {V,S} - return VersionedStore{V,S,typeof(store)}(store) -end -function VersionedStore{V,S}(store::VersionedStore) where {V,S} - p = parent(store) - return VersionedStore{V,S,typeof(p)}(p) -end - -Base.parent(store::VersionedStore) = store.parent - -@inline citostring(i::CartesianIndex, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$sep" : "" ) * join(reverse((i - oneunit(i)).I), sep) -@inline citostring(::CartesianIndex{0}, version::Int, sep::Char=default_sep(version)) = (version == 3 ? "c$(sep)0" : "0" ) -@inline citostring(i::CartesianIndex, ::Int, ::Type{V2ChunkKeyEncoding{S}}) where S = citostring(i, 2, S) -citostring(i::CartesianIndex, s::VersionedStore{V, S}) where {V,S} = citostring(i, V, S) - -Base.getindex(s::VersionedStore, p, i::CartesianIndex) = s[p, citostring(i,s)] -Base.delete!(s::VersionedStore, p, i::CartesianIndex) = delete!(s, p, citostring(i,s)) -Base.setindex!(s::VersionedStore, v, p, i::CartesianIndex) = s[p, citostring(i,s)]=v - -isinitialized(s::VersionedStore, p, i::CartesianIndex) = isinitialized(s,p,citostring(i, s)) - -""" -- [`storagesize(d::AbstractStore, p::AbstractString)`](@ref storagesize) -- [`subdirs(d::AbstractStore, p::AbstractString)`](@ref subdirs) -- [`subkeys(d::AbstractStore, p::AbstractString)`](@ref subkeys) -- [`isinitialized(d::AbstractStore, p::AbstractString)`](@ref isinitialized) -- [`storefromstring(::Type{<: AbstractStore}, s, _)`](@ref storefromstring) -- `Base.getindex(d::AbstractStore, i::AbstractString)`: return the data stored in key `i` as a Vector{UInt8} -- `Base.setindex!(d::AbstractStore, v, i::AbstractString)`: write the values in `v` to the key `i` of the given store `d` -""" - -storagesize(d::VersionedStore, p::AbstractString) = storagesize(parent(d), p) -subdirs(d::VersionedStore, p::AbstractString) = subdirs(parent(d), p) -subkeys(d::VersionedStore, p::AbstractString) = subkeys(parent(d), p) -isinitialized(d::VersionedStore, p::AbstractString) = isinitialized(parent(d), p) -storefromstring(::Type{VersionedStore{<: Any, <: Any, STORE}}, s, _) where STORE = VersionedStore{DV,DS}(storefromstring(STORE, s)) -storefromstring(::Type{VersionedStore{V,S}}, s, _) where {V,S} = VersionedStore{DV,DS}(storefromstring(s)) -storefromstring(::Type{VersionedStore{V,S,STORE}}, s, _) where {V,S,STORE} = VersionedStore{V,S,STORE}(storefromstring(STORE, s)) -Base.getindex(d::VersionedStore, i::AbstractString) = getindex(parent(d), i) -Base.setindex!(d::VersionedStore, v, i::AbstractString) = setindex!(parent(d), v, i) -Base.delete!(d::VersionedStore, i::AbstractString) = delete!(parent(d), i) - - -function Base.getproperty(store::VersionedStore{V,S}, sym::Symbol) where {V,S} - if sym == :dimension_separator - return S - elseif sym == :zarr_format - return V - elseif sym ∈ propertynames(getfield(store, :parent)) - # Support forwarding of properties to parent - return getproperty(store.parent, sym) - else - getfield(store, sym) - end -end -function Base.propertynames(store::VersionedStore) - return (:dimension_separator, :zarr_format, fieldnames(typeof(store))..., propertynames(store.parent)...) -end - - -""" - Zarr.set_dimension_separator(store::VersionedStore{V}, sep::Char)::VersionedStore{V,sep} - -Returns a VersionedStore of the same type with the same `zarr_format` parameter, `V`, -but with a dimension separator of `sep`. Note that this does not mutate the original store. - -# Examples - -``` -julia> Zarr.set_dimension_separator(Zarr.VersionedStore{2, '.'}(Zarr.DictStore(), '/')) |> typeof -Zarr.VersionedStore{2, '/',Zarr.DictStore} -``` - -""" -function set_dimension_separator(store::VersionedStore{V}, sep::Char) where V - return VersionedStore{V,sep}(store) -end -function set_dimension_separator(store::AbstractStore, sep::Char) - return VersionedStore{<: Any,sep}(store) -end - -""" - set_zarr_format(::VersionedStore{<: Any, S}, zarr_format::Int)::VersionedStore{zarr_format,S} - -Returns a VersionedStore of the same type with the same `dimension_separator` parameter, `S`, -but with the specified `zarr_format` parameter. Note that this does not mutate the original store. - -# Examples - -``` -julia> Zarr.set_zarr_format(Zarr.VersionedStore{2, '.'}(Zarr.DictStore(), 3)) |> typeof -Zarr.VersionedStore{3, '.', DictStore} -``` - -""" -function set_zarr_format(store::VersionedStore{<: Any, S}, zarr_format::Int) where S - return VersionedStore{zarr_format,S}(store) -end -function set_zarr_format(store::AbstractStore, zarr_format::Int) - return VersionedStore{zarr_format}(store) -end - -dimension_separator(::AbstractStore) = DS -dimension_separator(::VersionedStore{<: Any,S}) where S = S -zarr_format(::AbstractStore) = DV -zarr_format(::VersionedStore{V}) where V = V diff --git a/src/ZArray.jl b/src/ZArray.jl index 961c798f..951639f8 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -326,9 +326,9 @@ function zcreate(::Type{T}, dims::Integer...; end if path===nothing - store = VersionedStore{DV, dimension_separator}(DictStore()) + store = FormattedStore{DV, dimension_separator}(DictStore()) else - store = VersionedStore{DV, dimension_separator}(DirectoryStore(joinpath(path,name))) + store = FormattedStore{DV, dimension_separator}(DirectoryStore(joinpath(path,name))) end zcreate(T, store, dims...; kwargs...) end diff --git a/src/ZGroup.jl b/src/ZGroup.jl index e4d2100b..0164096f 100644 --- a/src/ZGroup.jl +++ b/src/ZGroup.jl @@ -123,15 +123,15 @@ function storefromstring(s, create=true) end end if create - return VersionedStore(DirectoryStore(s)), "" + return FormattedStore(DirectoryStore(s)), "" elseif isdir(s) # parse metadata to determine store kind temp_store = DirectoryStore(s) if is_zarray(temp_store, "") meta = getmetadata(temp_store, "", false) - store = VersionedStore{meta.zarr_format, meta.dimension_separator}(temp_store) + store = FormattedStore{meta.zarr_format, meta.dimension_separator}(temp_store) else - store = VersionedStore(temp_store) + store = FormattedStore(temp_store) end return store, "" else diff --git a/test/runtests.jl b/test/runtests.jl index ad434fe1..c01f441e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,7 +15,7 @@ CondaPkg.add("zarr"; version="2.*") @testset "fields" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.VersionedStore{2, '.', Zarr.DictStore}} + Zarr.FormattedStore{2, '.', Zarr.DictStore}} @test :a ∈ propertynames(z.storage) @test length(z.storage.a) === 3 @@ -43,7 +43,7 @@ CondaPkg.add("zarr"; version="2.*") @testset "methods" begin z = zzeros(Int64, 2, 3) @test z isa ZArray{Int64, 2, Zarr.BloscCompressor, - Zarr.VersionedStore{2, '.', Zarr.DictStore}} + Zarr.FormattedStore{2, '.', Zarr.DictStore}} @test eltype(z) === Int64 @test ndims(z) === 2 @@ -63,7 +63,7 @@ CondaPkg.add("zarr"; version="2.*") compressor=Zarr.NoCompressor()) @test z.metadata.compressor === Zarr.NoCompressor() - @test z.storage === Zarr.VersionedStore{2 ,'.'}(Zarr.DirectoryStore("$dir/$name")) + @test z.storage === Zarr.FormattedStore{2 ,'.'}(Zarr.DirectoryStore("$dir/$name")) @test isdir("$dir/$name") @test ispath("$dir/$name/.zarray") @test ispath("$dir/$name/.zattrs") From 9b77c87b503db13573e6e927653204cb813b6ae1 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 3 Jun 2025 20:14:44 -0400 Subject: [PATCH 16/16] Update formattedstore.jl from mkitti-v3-prototype --- src/Storage/formattedstore.jl | 56 +++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/Storage/formattedstore.jl b/src/Storage/formattedstore.jl index b60ab2da..750ec6ec 100644 --- a/src/Storage/formattedstore.jl +++ b/src/Storage/formattedstore.jl @@ -17,6 +17,8 @@ const DS = default_sep(DV) # A Char is the separator for the default chunk key encoding abstract type ChunkKeyEncoding end struct V2ChunkKeyEncoding{SEP} <: ChunkKeyEncoding end +separator(c::Char) = c +separator(v2cke::V2ChunkKeyEncoding{SEP}) where SEP = SEP """ FormattedStore{V,CKE,STORE <: AbstractStore} <: AbstractStore @@ -172,3 +174,57 @@ dimension_separator(::AbstractStore) = DS dimension_separator(::FormattedStore{<: Any,S}) where S = S zarr_format(::AbstractStore) = DV zarr_format(::FormattedStore{V}) where V = V + +is_zgroup(s::FormattedStore{3}, p, metadata=getmetadata(s, p, false)) = + isinitialized(s,_concatpath(p,"zarr.json")) && + metadata.node_type == "group" +is_zarray(s::FormattedStore{3}, p, metadata=getmetadata(s, p, false)) = + isinitialized(s,_concatpath(p,"zarr.json")) && + metadata.node_type == "array" + +getmetadata(s::FormattedStore{3}, p,fill_as_missing) = Metadata(String(maybecopy(s[p,"zarr.json"])),fill_as_missing) +function writemetadata(s::FormattedStore{3}, p, m::Metadata; indent_json::Bool= false) + met = IOBuffer() + + if indent_json + JSON.print(met,m,4) + else + JSON.print(met,m) + end + + s[p,"zarr.json"] = take!(met) + m +end + +function getattrs(s::FormattedStore{3}) + md = s[p,"zarr.json"] + if md === nothing + error("zarr.json not found") + else + md = JSON.parse(replace(String(maybecopy(md)),": NaN,"=>": \"NaN\",")) + return get(md, "attributes", Dict{String, Any}()) + end +end + +function writeattrs(s::FormattedStore{3}, p, att::Dict; indent_json::Bool= false) + # This is messy, we need to open zarr.json and replace the attributes section + md = s[p,"zarr.json"] + if md === nothing + error("zarr.json not found") + else + md = JSON.parse(replace(String(maybecopy(md)),": NaN,"=>": \"NaN\",")) + end + md = Dict(md) + md["attributes"] = att + + b = IOBuffer() + + if indent_json + JSON.print(b,md,4) + else + JSON.print(b,md) + end + + s[p,"zarr.json"] = take!(b) + att +end