diff --git a/poetry.lock b/poetry.lock index 84dfc9e..5e357ae 100644 --- a/poetry.lock +++ b/poetry.lock @@ -290,6 +290,24 @@ docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2. testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] typing = ["typing-extensions (>=4.12.2)"] +[[package]] +name = "h5netcdf" +version = "1.3.0" +description = "netCDF4 via h5py" +optional = false +python-versions = ">=3.9" +files = [ + {file = "h5netcdf-1.3.0-py3-none-any.whl", hash = "sha256:f2df69dcd3665dc9c4d43eb6529dedd113b2508090d12ac973573305a8406465"}, + {file = "h5netcdf-1.3.0.tar.gz", hash = "sha256:a171c027daeb34b24c24a3b6304195b8eabbb6f10c748256ed3cfe19806383cf"}, +] + +[package.dependencies] +h5py = "*" +packaging = "*" + +[package.extras] +test = ["netCDF4", "pytest"] + [[package]] name = "h5py" version = "3.11.0" @@ -1530,4 +1548,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "0023555becb3abbfba198d28b8463cf51dc279c98c6d01bf8ac625a909e14eb6" +content-hash = "47cf503b7a309114705094f221fbd3e991ba6d4e593880bb3542bbdf9c3f5724" diff --git a/pyproject.toml b/pyproject.toml index 36124fc..60d09f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ numpy = ">=1.24.3" h5py = ">=3.9.0" zarr = ">=2.16.1" xarray = "^2024.1.1" +h5netcdf = ">=1.3.0" ruamel-yaml = "^0.18.6" importlib_metadata = "*" diff --git a/src/linkml_arrays/dumpers/__init__.py b/src/linkml_arrays/dumpers/__init__.py index 5011043..0402927 100644 --- a/src/linkml_arrays/dumpers/__init__.py +++ b/src/linkml_arrays/dumpers/__init__.py @@ -1,15 +1,21 @@ """Dumper classes for linkml-arrays.""" from .hdf5_dumper import Hdf5Dumper +from .xarray_dumpers import XarrayNetCDFDumper, XarrayZarrDumper from .yaml_dumper import YamlDumper from .yaml_hdf5_dumper import YamlHdf5Dumper from .yaml_numpy_dumper import YamlNumpyDumper +from .yaml_xarray_dumpers import YamlXarrayNetCDFDumper, YamlXarrayZarrDumper from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper __all__ = [ "Hdf5Dumper", + "XarrayNetCDFDumper", + "XarrayZarrDumper", "YamlDumper", "YamlHdf5Dumper", "YamlNumpyDumper", "ZarrDirectoryStoreDumper", + "YamlXarrayNetCDFDumper", + "YamlXarrayZarrDumper", ] diff --git a/src/linkml_arrays/dumpers/hdf5_dumper.py b/src/linkml_arrays/dumpers/hdf5_dumper.py index eb50a03..538badb 100644 --- a/src/linkml_arrays/dumpers/hdf5_dumper.py +++ b/src/linkml_arrays/dumpers/hdf5_dumper.py @@ -39,14 +39,16 @@ def _iterate_element( class Hdf5Dumper(Dumper): """Dumper class for LinkML models to HDF5 files.""" - # TODO is this the right method to overwrite? it does not dump a string - def dumps( + def dump( self, element: Union[YAMLRoot, BaseModel], + to_file: str, schemaview: SchemaView, - output_file_path: Union[str, Path], **kwargs, ): """Dump the element to an HDF5 file.""" - with h5py.File(output_file_path, "w") as f: + with h5py.File(to_file, "w") as f: _iterate_element(element, schemaview, f) + + def dumps(self, element: Union[YAMLRoot, BaseModel], **kwargs): + raise NotImplementedError("This method is not sensible for this dumper.") diff --git a/src/linkml_arrays/dumpers/xarray_dumpers.py b/src/linkml_arrays/dumpers/xarray_dumpers.py new file mode 100644 index 0000000..bddb206 --- /dev/null +++ b/src/linkml_arrays/dumpers/xarray_dumpers.py @@ -0,0 +1,132 @@ +"""Class for dumping a LinkML model to netcdf like using xarray and DataTree.""" + +from pathlib import Path + +import numpy as np +from xarray.core.datatree import DataTree + +"""Class for dumping a LinkML model to an netcdf like file.""" + +from pathlib import Path +from typing import Union + +import xarray as xr +import pandas as pd +from linkml_runtime import SchemaView +from linkml_runtime.dumpers.dumper_root import Dumper +from linkml_runtime.utils.yamlutils import YAMLRoot +from pydantic import BaseModel +from linkml_runtime import SchemaView + + +def _create_node(model, schemaview): + """Create datatree from temperature dataset""" + node_dict = {} + for k, v in vars(model).items(): + if isinstance(v, str): + # parts of the dataset with key and value both being string, e.g. name, latitude_in_deg + try: + node_dict["attrs"][k] = v + except KeyError: + node_dict["attrs"] = {k: v} + elif isinstance(v, BaseModel): + if len(var_dict := vars(v)) == 1: + # If values are length 1 we are dealing with coords like date + v = v.values + try: + node_dict["coords"][k] = v + except KeyError: + node_dict["coords"] = {k: {"data": v, "dims": k}} + else: + for key, value in var_dict.items(): + if key == "values": + if not isinstance(value[0], list): + try: + node_dict["coords"][k] = {"data": value, "dims": list(node_dict["coords"])[0]} + except KeyError: + node_dict["coords"] = {k: {"data": value, "dims": list(node_dict["coords"])[0]}} + else: + # Parse the temperature matrix + element_type = type(v).__name__ + dimensions_expressions = schemaview.induced_slot(key, element_type).array.dimensions + dims = [dim.alias for dim in dimensions_expressions] + array = np.array(value) + node_dict["dims"] = {dim: array.shape[i] for i, dim in enumerate(dims)} + node_dict["data_vars"][k].update({"data": array, "dims": list(node_dict["dims"].keys())}) + else: + if isinstance(value, str): + # can't use timestamp here as it does not serialize, potentially add with 'data' dims as coord + node_dict["coords"][k].update({"attrs": {key: value}}) + else: + # conversion factor + node_dict["data_vars"] = {k: {"attrs": {key: value}}} + return xr.Dataset.from_dict(node_dict) + + + +def _iterate_element( + element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, datatree = None +): + """Recursively iterate through the elements of a LinkML model and save them. + + Write toplevel Pydantic BaseModel objects as datasets, slots with the "array" element + as datasets, and other slots as attributes. + """ + # get the type of the element + element_type = type(element).__name__ + + for k, v in vars(element).items(): + if isinstance(v, BaseModel): + # create a subgroup and recurse + if "values" in v.__dir__(): + dims = ["y","x"] + data_dict = vars(v) + data_dict["data"] = np.array(data_dict.pop("values")) + data_dict["dims"] = [dims[i] for i in range(data_dict["data"].shape[0])] + data_dict["attrs"] = {"name": v.name} + dataarray = xr.DataArray.from_dict(d=data_dict) + datatree[k] = dataarray + else: + dataset = _create_node(v, schemaview) + datatree[k] = DataTree(dataset) + elif isinstance(v, str): + datatree.attrs["name"] = v + return datatree + + +class XarrayNetCDFDumper(Dumper): + """Dumper class for LinkML models to HDF5 files.""" + + def dump( + self, + element: Union[YAMLRoot, BaseModel], + to_file: str, + schemaview: SchemaView, + **kwargs, + ): + """Dump the element to an HDF5 file.""" + datatree = DataTree() + datatree = _iterate_element(element, schemaview, datatree) + datatree.to_netcdf(to_file, engine='h5netcdf') + + def dumps(self, element: Union[YAMLRoot, BaseModel], **kwargs): + raise NotImplementedError("This method is not sensible for this dumper.") + + +class XarrayZarrDumper(Dumper): + """Dumper class for LinkML models to HDF5 files.""" + + def dump( + self, + element: Union[YAMLRoot, BaseModel], + to_file: str, + schemaview: SchemaView, + **kwargs, + ): + """Dump the element to an HDF5 file.""" + datatree = DataTree() + datatree = _iterate_element(element, schemaview, datatree) + datatree.to_zarr(to_file) + + def dumps(self, element: Union[YAMLRoot, BaseModel], **kwargs): + raise NotImplementedError("This method is not sensible for this dumper.") \ No newline at end of file diff --git a/src/linkml_arrays/dumpers/yaml_array_file_dumper.py b/src/linkml_arrays/dumpers/yaml_array_file_dumper.py index 18f9807..0b39199 100644 --- a/src/linkml_arrays/dumpers/yaml_array_file_dumper.py +++ b/src/linkml_arrays/dumpers/yaml_array_file_dumper.py @@ -61,19 +61,17 @@ def _iterate_element( else: output_file_name = f"{found_slot.name}" - # if output_dir is absolute, make it relative to current working directory - # and create the directory if it does not exist - if output_dir.is_absolute(): - output_dir = Path(os.path.relpath(output_dir, start=os.getcwd())) - output_dir.mkdir(exist_ok=True) - output_file_path_no_suffix = output_dir / output_file_name + output_file_path_no_suffix = (output_dir / output_file_name) # save the numpy array to file and write the file path to the dictionary output_file_path = write_array(v, output_file_path_no_suffix) + + # write the path to the array file relative to the output directory where the yaml is written + relative_output_file_path = os.path.relpath(output_file_path, start=output_dir) ret_dict[k] = { "source": [ { - "file": f"./{output_file_path}", + "file": f"{relative_output_file_path}", "format": format, } ] @@ -100,6 +98,21 @@ class YamlArrayFileDumper(Dumper, metaclass=ABCMeta): # FORMAT is a class attribute that must be set by subclasses + def dump( + self, + element: Union[YAMLRoot, BaseModel], + to_file: str, + schemaview: SchemaView, + **kwargs, + ): + """Dump the element to a YAML file with paths to array files.""" + output_dir = Path(to_file).parent + input = _iterate_element( + element, schemaview, Path(output_dir), self.write_array, self.FORMAT + ) + with open(to_file, "w") as f: + yaml.dump(input, f) + def dumps( self, element: Union[YAMLRoot, BaseModel], diff --git a/src/linkml_arrays/dumpers/yaml_xarray_dumpers.py b/src/linkml_arrays/dumpers/yaml_xarray_dumpers.py new file mode 100644 index 0000000..5c81651 --- /dev/null +++ b/src/linkml_arrays/dumpers/yaml_xarray_dumpers.py @@ -0,0 +1,63 @@ +"""Class for dumping a LinkML model to YAML with paths to NumPy files.""" + +from pathlib import Path +from typing import List, Union + +import numpy as np +import xarray as xr + +from .yaml_array_file_dumper import YamlArrayFileDumper + + +class YamlXarrayNetCDFDumper(YamlArrayFileDumper): + """Dumper class for LinkML models to YAML with paths to .nc file. + + Each array is written to a netcdf dataset at path "/data" in a new .nc file. + """ + + FILE_SUFFIX = ".nc" # used in parent class + FORMAT = "netcdf" + + @classmethod + def write_array( + cls, array: Union[List, np.ndarray], output_file_path_no_suffix: Union[str, Path] + ): + """Write an array to a NumPy file.""" + # TODO do not assume that there is only one by this name + # add suffix to the file name + if isinstance(output_file_path_no_suffix, str): + output_file_path_no_suffix = Path(output_file_path_no_suffix) + output_file_path = output_file_path_no_suffix.parent / ( + output_file_path_no_suffix.name + cls.FILE_SUFFIX + ) + + data_array = xr.DataArray(data=array) + data_array.to_netcdf(output_file_path, engine="h5netcdf") + return output_file_path + + +class YamlXarrayZarrDumper(YamlArrayFileDumper): + """Dumper class for LinkML models to YAML with paths to .zarr file. + + Each array is written to a zarr dataset at path "/data" in a new .zarr file. + """ + + FILE_SUFFIX = ".zarr" # used in parent class + FORMAT = "zarr" + + @classmethod + def write_array( + cls, array: Union[List, np.ndarray], output_file_path_no_suffix: Union[str, Path] + ): + """Write an array to a NumPy file.""" + # TODO do not assume that there is only one by this name + # add suffix to the file name + if isinstance(output_file_path_no_suffix, str): + output_file_path_no_suffix = Path(output_file_path_no_suffix) + output_file_path = output_file_path_no_suffix.parent / ( + output_file_path_no_suffix.name + cls.FILE_SUFFIX + ) + + data_array = xr.DataArray(data=array) + data_array.to_zarr(output_file_path) + return output_file_path \ No newline at end of file diff --git a/src/linkml_arrays/dumpers/zarr_directory_store_dumper.py b/src/linkml_arrays/dumpers/zarr_directory_store_dumper.py index ca4e1d2..720e5f5 100644 --- a/src/linkml_arrays/dumpers/zarr_directory_store_dumper.py +++ b/src/linkml_arrays/dumpers/zarr_directory_store_dumper.py @@ -39,15 +39,17 @@ def _iterate_element( class ZarrDirectoryStoreDumper(Dumper): """Dumper class for LinkML models to Zarr directory stores.""" - # TODO is this the right method to overwrite? it does not dump a string - def dumps( + def dump( self, element: Union[YAMLRoot, BaseModel], + to_file: str, schemaview: SchemaView, - output_file_path: Union[str, Path], **kwargs, ): """Dump the element to a Zarr directory store.""" - store = zarr.DirectoryStore(output_file_path) + store = zarr.DirectoryStore(to_file) root = zarr.group(store=store, overwrite=True) _iterate_element(element, schemaview, root) + + def dumps(self, element: Union[YAMLRoot, BaseModel], **kwargs): + raise NotImplementedError("This method is not sensible for this dumper.") diff --git a/src/linkml_arrays/loaders/__init__.py b/src/linkml_arrays/loaders/__init__.py index af71135..d86a0c5 100644 --- a/src/linkml_arrays/loaders/__init__.py +++ b/src/linkml_arrays/loaders/__init__.py @@ -3,10 +3,13 @@ from .hdf5_loader import Hdf5Loader from .yaml_array_file_loader import YamlArrayFileLoader from .yaml_loader import YamlLoader +from .xarray_loaders import XarrayZarrLoader, XarrayNetCDFLoader from .zarr_directory_store_loader import ZarrDirectoryStoreLoader __all__ = [ "Hdf5Loader", + "XarrayNetCDFLoader", + "XarrayZarrLoader", "YamlArrayFileLoader", "YamlLoader", "ZarrDirectoryStoreLoader", diff --git a/src/linkml_arrays/loaders/xarray_loaders.py b/src/linkml_arrays/loaders/xarray_loaders.py new file mode 100644 index 0000000..f9a85a4 --- /dev/null +++ b/src/linkml_arrays/loaders/xarray_loaders.py @@ -0,0 +1,107 @@ +"""Class for loading a LinkML model from a Zarr directory store.""" + +from typing import Type, Union + +from pathlib import Path +from xarray.backends.api import open_datatree +import xarray as xr +from xarray.core.datatree import DataTree +from linkml_runtime import SchemaView +from linkml_runtime.linkml_model import ClassDefinition +from linkml_runtime.loaders.loader_root import Loader +from linkml_runtime.utils.yamlutils import YAMLRoot +from pydantic import BaseModel + + +def _iterate_element( + group: DataTree, element_type: ClassDefinition, schemaview: SchemaView +) -> dict: + """Recursively iterate through the elements of a LinkML model and load them into a dict. + + Datasets are read into memory. + """ + ret_dict = dict() + for k, v in group.attrs.items(): + ret_dict[k] = v + + for k, v in group.items(): + found_slot = schemaview.induced_slot( + k, element_type.name + ) # assumes the slot name has been written as the name which is OK for now. + if isinstance(v, xr.DataArray): + if not v.coords: + value_dict = {key: v.attrs[key] for key in v.attrs} + value_dict.update({"values": v.values}) # read all the values into memory # TODO support lazy loading + else: + value_dict = {key: v.attrs[key] for key in v.attrs} + value_dict.update({"values": v.values}) # read all the values into memory # TODO support lazy loading + + + for coord in v.coords: + coordinate_array_dict = {key: value for key, value in v.coords[coord].attrs.items()} + coordinate_array_dict.update({"values": v.coords[coord].values}) + ret_dict[coord] = coordinate_array_dict + + ret_dict[k] = value_dict + + elif isinstance(v, DataTree): # it's a subgroup + found_slot_range = schemaview.get_class(found_slot.range) + v = _iterate_element(v, found_slot_range, schemaview) + ret_dict[k] = v + + return ret_dict + + +class XarrayZarrLoader(Loader): + """Class for loading a LinkML model from a xarray Zarr directory store.""" + + def load_any(self, source: str, **kwargs): + """Create an instance of the target class from a Zarr directory store.""" + return self.load(source, **kwargs) + + def loads(self, source: str, **kwargs): + """Create an instance of the target class from a Zarr directory store.""" + return self.load(source, **kwargs) + + def load( + self, + source: str, + target_class: Type[Union[YAMLRoot, BaseModel]], + schemaview: SchemaView, + **kwargs, + ): + """Create an instance of the target class from a Zarr directory store.""" + element_type = schemaview.get_class(target_class.__name__) + z = open_datatree(Path(source), engine="zarr") + element = _iterate_element(z, element_type, schemaview) + obj = target_class(**element) + + return obj + + +class XarrayNetCDFLoader(Loader): + """Class for loading a LinkML model from a xarray netcdf store.""" + + def load_any(self, source: str, **kwargs): + """Create an instance of the target class from a netcdf store.""" + return self.load(source, **kwargs) + + def loads(self, source: str, **kwargs): + """Create an instance of the target class from a netcdf store.""" + return self.load(source, **kwargs) + + def load( + self, + source: str, + target_class: Type[Union[YAMLRoot, BaseModel]], + schemaview: SchemaView, + **kwargs, + ): + """Create an instance of the target class from a netcdf store.""" + element_type = schemaview.get_class(target_class.__name__) + # opening with this engine gives problems with permissions at least on windows. + z = open_datatree(Path(source), engine='h5netcdf') + element = _iterate_element(z, element_type, schemaview) + obj = target_class(**element) + + return obj diff --git a/src/linkml_arrays/loaders/yaml_array_file_loader.py b/src/linkml_arrays/loaders/yaml_array_file_loader.py index d40b61d..26c9d94 100644 --- a/src/linkml_arrays/loaders/yaml_array_file_loader.py +++ b/src/linkml_arrays/loaders/yaml_array_file_loader.py @@ -10,6 +10,21 @@ from linkml_runtime.loaders.loader_root import Loader from linkml_runtime.utils.yamlutils import YAMLRoot from pydantic import BaseModel +from pathlib import Path +import xarray as xr + +def _parse_xarray_dataset(source, k, format): + file_path = source.get("file", None) + if file_path is None: + raise ValueError( + f"Array slot {k}, source {source}, format {format} has no file." + ) + if format == "zarr": + data_set = xr.open_zarr(file_path) + else: + data_set = xr.open_dataset(file_path, engine='h5netcdf') + array_key = list(data_set.data_vars.keys())[0] + return data_set[array_key].values def _iterate_element( @@ -39,7 +54,7 @@ def _iterate_element( raise ValueError( f"Array slot {k}, source {source}, format {format} has no file." ) - array_file_path = file + array_file_path = Path(file) with h5py.File(array_file_path, "r") as f: # read all the values into memory TODO: support lazy loading v = f["data"][()] @@ -52,6 +67,9 @@ def _iterate_element( array_file_path = file # read all the values into memory TODO: support lazy loading v = np.load(array_file_path) + elif format in ["zarr", "netcdf"]: + v = _parse_xarray_dataset(source, k, format) + elif isinstance(v, dict): found_slot_range = schemaview.get_class(found_slot.range) v = _iterate_element(v, found_slot_range, schemaview) diff --git a/tests/input/container_yaml.yaml b/tests/test_dumpers/ground_truth/container_yaml.yaml similarity index 100% rename from tests/input/container_yaml.yaml rename to tests/test_dumpers/ground_truth/container_yaml.yaml diff --git a/tests/input/container_yaml_hdf5.yaml b/tests/test_dumpers/ground_truth/container_yaml_hdf5.yaml similarity index 67% rename from tests/input/container_yaml_hdf5.yaml rename to tests/test_dumpers/ground_truth/container_yaml_hdf5.yaml index b2e8d1a..a52de35 100644 --- a/tests/input/container_yaml_hdf5.yaml +++ b/tests/test_dumpers/ground_truth/container_yaml_hdf5.yaml @@ -3,25 +3,25 @@ latitude_series: name: my_latitude values: source: - - file: ./out/my_latitude.values.h5 + - file: my_latitude.values.h5 format: hdf5 longitude_series: name: my_longitude values: source: - - file: ./out/my_longitude.values.h5 + - file: my_longitude.values.h5 format: hdf5 temperature_dataset: date: values: source: - - file: ./out/my_temperature.date.values.h5 + - file: my_temperature.date.values.h5 format: hdf5 day_in_d: reference_date: '2020-01-01' values: source: - - file: ./out/my_temperature.day_in_d.values.h5 + - file: my_temperature.day_in_d.values.h5 format: hdf5 latitude_in_deg: my_latitude longitude_in_deg: my_longitude @@ -30,5 +30,5 @@ temperature_dataset: conversion_factor: 1000.0 values: source: - - file: ./out/my_temperature.temperatures_in_K.values.h5 + - file: my_temperature.temperatures_in_K.values.h5 format: hdf5 diff --git a/tests/input/container_yaml_numpy.yaml b/tests/test_dumpers/ground_truth/container_yaml_numpy.yaml similarity index 66% rename from tests/input/container_yaml_numpy.yaml rename to tests/test_dumpers/ground_truth/container_yaml_numpy.yaml index 1e8d765..d2d7bd1 100644 --- a/tests/input/container_yaml_numpy.yaml +++ b/tests/test_dumpers/ground_truth/container_yaml_numpy.yaml @@ -3,25 +3,25 @@ latitude_series: name: my_latitude values: source: - - file: "./out/my_latitude.values.npy" + - file: my_latitude.values.npy format: numpy longitude_series: name: my_longitude values: source: - - file: "./out/my_longitude.values.npy" + - file: my_longitude.values.npy format: numpy temperature_dataset: date: values: source: - - file: "./out/my_temperature.date.values.npy" + - file: my_temperature.date.values.npy format: numpy day_in_d: reference_date: '2020-01-01' values: source: - - file: "./out/my_temperature.day_in_d.values.npy" + - file: my_temperature.day_in_d.values.npy format: numpy latitude_in_deg: my_latitude longitude_in_deg: my_longitude @@ -30,5 +30,5 @@ temperature_dataset: conversion_factor: 1000.0 values: source: - - file: "./out/my_temperature.temperatures_in_K.values.npy" + - file: my_temperature.temperatures_in_K.values.npy format: numpy diff --git a/tests/test_dumpers/ground_truth/container_yaml_xarray_netcdf.yaml b/tests/test_dumpers/ground_truth/container_yaml_xarray_netcdf.yaml new file mode 100644 index 0000000..da875ec --- /dev/null +++ b/tests/test_dumpers/ground_truth/container_yaml_xarray_netcdf.yaml @@ -0,0 +1,34 @@ +name: my_container +latitude_series: + name: my_latitude + values: + source: + - file: my_latitude.values.nc + format: netcdf +longitude_series: + name: my_longitude + values: + source: + - file: my_longitude.values.nc + format: netcdf +temperature_dataset: + date: + values: + source: + - file: my_temperature.date.values.nc + format: netcdf + day_in_d: + reference_date: '2020-01-01' + values: + source: + - file: my_temperature.day_in_d.values.nc + format: netcdf + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + name: my_temperature + temperatures_in_K: + conversion_factor: 1000.0 + values: + source: + - file: my_temperature.temperatures_in_K.values.nc + format: netcdf diff --git a/tests/test_dumpers/ground_truth/container_yaml_xarray_zarr.yaml b/tests/test_dumpers/ground_truth/container_yaml_xarray_zarr.yaml new file mode 100644 index 0000000..de6a1da --- /dev/null +++ b/tests/test_dumpers/ground_truth/container_yaml_xarray_zarr.yaml @@ -0,0 +1,34 @@ +name: my_container +latitude_series: + name: my_latitude + values: + source: + - file: my_latitude.values.zarr + format: zarr +longitude_series: + name: my_longitude + values: + source: + - file: my_longitude.values.zarr + format: zarr +temperature_dataset: + date: + values: + source: + - file: my_temperature.date.values.zarr + format: zarr + day_in_d: + reference_date: '2020-01-01' + values: + source: + - file: my_temperature.day_in_d.values.zarr + format: zarr + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + name: my_temperature + temperatures_in_K: + conversion_factor: 1000.0 + values: + source: + - file: my_temperature.temperatures_in_K.values.zarr + format: zarr diff --git a/tests/input/my_container.h5 b/tests/test_dumpers/ground_truth/my_container.h5 similarity index 100% rename from tests/input/my_container.h5 rename to tests/test_dumpers/ground_truth/my_container.h5 diff --git a/tests/test_dumpers/ground_truth/my_container.nc b/tests/test_dumpers/ground_truth/my_container.nc new file mode 100644 index 0000000..100f811 Binary files /dev/null and b/tests/test_dumpers/ground_truth/my_container.nc differ diff --git a/tests/input/my_container.zarr/.zattrs b/tests/test_dumpers/ground_truth/my_container.zarr/.zattrs similarity index 100% rename from tests/input/my_container.zarr/.zattrs rename to tests/test_dumpers/ground_truth/my_container.zarr/.zattrs diff --git a/tests/input/my_container.zarr/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/.zgroup similarity index 100% rename from tests/input/my_container.zarr/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/.zgroup diff --git a/tests/input/my_container.zarr/latitude_series/.zattrs b/tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/.zattrs similarity index 100% rename from tests/input/my_container.zarr/latitude_series/.zattrs rename to tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/.zattrs diff --git a/tests/input/my_container.zarr/latitude_series/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/.zgroup similarity index 100% rename from tests/input/my_container.zarr/latitude_series/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/.zgroup diff --git a/tests/input/my_container.zarr/latitude_series/values/.zarray b/tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/values/.zarray similarity index 100% rename from tests/input/my_container.zarr/latitude_series/values/.zarray rename to tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/values/.zarray diff --git a/tests/input/my_container.zarr/latitude_series/values/0.0 b/tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/values/0.0 similarity index 100% rename from tests/input/my_container.zarr/latitude_series/values/0.0 rename to tests/test_dumpers/ground_truth/my_container.zarr/latitude_series/values/0.0 diff --git a/tests/input/my_container.zarr/longitude_series/.zattrs b/tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/.zattrs similarity index 100% rename from tests/input/my_container.zarr/longitude_series/.zattrs rename to tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/.zattrs diff --git a/tests/input/my_container.zarr/longitude_series/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/.zgroup similarity index 100% rename from tests/input/my_container.zarr/longitude_series/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/.zgroup diff --git a/tests/input/my_container.zarr/longitude_series/values/.zarray b/tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/values/.zarray similarity index 100% rename from tests/input/my_container.zarr/longitude_series/values/.zarray rename to tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/values/.zarray diff --git a/tests/input/my_container.zarr/longitude_series/values/0.0 b/tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/values/0.0 similarity index 100% rename from tests/input/my_container.zarr/longitude_series/values/0.0 rename to tests/test_dumpers/ground_truth/my_container.zarr/longitude_series/values/0.0 diff --git a/tests/input/my_container.zarr/temperature_dataset/.zattrs b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/.zattrs similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/.zattrs rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/.zattrs diff --git a/tests/input/my_container.zarr/temperature_dataset/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/.zgroup similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/.zgroup diff --git a/tests/input/my_container.zarr/temperature_dataset/date/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/date/.zgroup similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/date/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/date/.zgroup diff --git a/tests/input/my_container.zarr/temperature_dataset/date/values/.zarray b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/date/values/.zarray similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/date/values/.zarray rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/date/values/.zarray diff --git a/tests/input/my_container.zarr/temperature_dataset/date/values/0 b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/date/values/0 similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/date/values/0 rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/date/values/0 diff --git a/tests/input/my_container.zarr/temperature_dataset/day_in_d/.zattrs b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/.zattrs similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/day_in_d/.zattrs rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/.zattrs diff --git a/tests/input/my_container.zarr/temperature_dataset/day_in_d/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/.zgroup similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/day_in_d/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/.zgroup diff --git a/tests/input/my_container.zarr/temperature_dataset/day_in_d/values/.zarray b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/values/.zarray similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/day_in_d/values/.zarray rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/values/.zarray diff --git a/tests/input/my_container.zarr/temperature_dataset/day_in_d/values/0 b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/values/0 similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/day_in_d/values/0 rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/day_in_d/values/0 diff --git a/tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/.zattrs b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/.zattrs similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/.zattrs rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/.zattrs diff --git a/tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/.zgroup b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/.zgroup similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/.zgroup rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/.zgroup diff --git a/tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/values/.zarray b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/values/.zarray similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/values/.zarray rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/values/.zarray diff --git a/tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/values/0.0.0 b/tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/values/0.0.0 similarity index 100% rename from tests/input/my_container.zarr/temperature_dataset/temperatures_in_K/values/0.0.0 rename to tests/test_dumpers/ground_truth/my_container.zarr/temperature_dataset/temperatures_in_K/values/0.0.0 diff --git a/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zattrs b/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zattrs new file mode 100644 index 0000000..060eade --- /dev/null +++ b/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zattrs @@ -0,0 +1,3 @@ +{ + "name": "my_container" +} \ No newline at end of file diff --git a/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zgroup b/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zmetadata b/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zmetadata new file mode 100644 index 0000000..c0c3165 --- /dev/null +++ b/tests/test_dumpers/ground_truth/my_container_xarray.zarr/.zmetadata @@ -0,0 +1,161 @@ +{ + "metadata": { + ".zattrs": { + "name": "my_container" + }, + ".zgroup": { + "zarr_format": 2 + }, + "latitude_series/.zarray": { + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " Container: @@ -59,59 +62,164 @@ def _create_container() -> Container: def test_yaml_dumper(): """Test YamlDumper dumping to a YAML file.""" + # NOTE: YamlDumper dumps to a YAML string/stream, not a file container = _create_container() - schemaview = SchemaView(INPUT_DIR / "temperature_schema.yaml") + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") ret = YamlDumper().dumps(container, schemaview=schemaview) # read and compare with the expected YAML file ignoring order of keys - expected_yaml_file = INPUT_DIR / "container_yaml.yaml" + expected_yaml_file = GROUND_TRUTH_DIR / "container_yaml.yaml" yaml = YAML(typ="safe") with open(expected_yaml_file) as f: expected = yaml.load(f) # load yaml into dictionary actual = yaml.load(ret) - assert actual == expected + assert expected.keys() == actual.keys() + for key in expected.keys(): + assert actual[key] == expected[key] -def test_yaml_numpy_dumper(): +def test_yaml_numpy_dumper(tmp_path): """Test YamlNumpyDumper dumping to a YAML file and NumPy .npy files in a directory.""" container = _create_container() - schemaview = SchemaView(INPUT_DIR / "temperature_schema.yaml") - ret = YamlNumpyDumper().dumps(container, schemaview=schemaview, output_dir="./out") + output_yaml = tmp_path / "container_yaml_numpy.yaml" + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") + YamlNumpyDumper().dump(container, to_file=output_yaml, schemaview=schemaview) # read and compare with the expected YAML file ignoring order of keys - expected_yaml_file = INPUT_DIR / "container_yaml_numpy.yaml" + expected_yaml_file = GROUND_TRUTH_DIR / "container_yaml_numpy.yaml" yaml = YAML(typ="safe") - with open(expected_yaml_file) as f: - expected = yaml.load(f) # load yaml into dictionary - actual = yaml.load(ret) - assert actual == expected + with open(output_yaml) as f_actual: + with open(expected_yaml_file) as f_expected: + actual = yaml.load(f_actual) + expected = yaml.load(f_expected) + assert actual == expected -def test_yaml_hdf5_dumper(): +def test_yaml_hdf5_dumper(tmp_path): """Test YamlNumpyDumper dumping to a YAML file and HDF5 datasets in a directory.""" container = _create_container() - schemaview = SchemaView(INPUT_DIR / "temperature_schema.yaml") - ret = YamlHdf5Dumper().dumps(container, schemaview=schemaview, output_dir="./out") + output_yaml = tmp_path / "container_yaml_hdf5.yaml" + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") + YamlHdf5Dumper().dump(container, to_file=output_yaml, schemaview=schemaview) # read and compare with the expected YAML file ignoring order of keys - expected_yaml_file = INPUT_DIR / "container_yaml_hdf5.yaml" + expected_yaml_file = GROUND_TRUTH_DIR / "container_yaml_hdf5.yaml" yaml = YAML(typ="safe") - with open(expected_yaml_file) as f: - expected = yaml.load(f) # load yaml into dictionary - actual = yaml.load(ret) - assert actual == expected + with open(output_yaml) as f_actual: + with open(expected_yaml_file) as f_expected: + actual = yaml.load(f_actual) + expected = yaml.load(f_expected) + assert actual == expected + + +def test_yaml_xarray_zarr_dumper(tmp_path): + """Test YamlXarrayDumper dumping to a YAML file and zarr datasets in a directory.""" + container = _create_container() + + output_yaml = tmp_path / "container_yaml_xarray_zarr.yaml" + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") + YamlXarrayZarrDumper().dump(container, to_file=output_yaml, schemaview=schemaview) + + # read and compare with the expected YAML file ignoring order of keys + expected_yaml_file = GROUND_TRUTH_DIR / "container_yaml_xarray_zarr.yaml" + yaml = YAML(typ="safe") + with open(output_yaml) as f_actual: + with open(expected_yaml_file) as f_expected: + actual = yaml.load(f_actual) + expected = yaml.load(f_expected) + assert actual == expected + + +def test_yaml_xarray_netcdf_dumper(tmp_path): + """Test YamlXarrayNetCDFDumper dumping to a YAML file and netcdf datasets in a directory.""" + container = _create_container() + + output_yaml = tmp_path / "container_yaml_xarray_netcdf.yaml" + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") + YamlXarrayNetCDFDumper().dump(container, to_file=output_yaml, schemaview=schemaview) + + # read and compare with the expected YAML file ignoring order of keys + expected_yaml_file = GROUND_TRUTH_DIR / "container_yaml_xarray_netcdf.yaml" + yaml = YAML(typ="safe") + with open(output_yaml) as f_actual: + with open(expected_yaml_file) as f_expected: + actual = yaml.load(f_actual) + expected = yaml.load(f_expected) + assert actual == expected + + +def test_xarray_zarr_dumper(tmp_path): + container = _create_container() + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") + output_file_path = tmp_path / "my_container_xarray.zarr" + XarrayZarrDumper().dump(container, to_file=output_file_path, schemaview=schemaview) + + assert os.path.exists(output_file_path) + root = zarr.group(store=output_file_path) + assert root.attrs["name"] == "my_container" + np.testing.assert_array_equal(root["latitude_series"][:], [[1, 2], [3, 4]]) + + np.testing.assert_array_equal(root["longitude_series"][:], [[5, 6], [7, 8]]) + assert set(root["temperature_dataset"]) == set(["date", "day_in_d", "temperatures_in_K"]) + + # Below reference date seems to be added automatically when using pd.to_datetime + np.testing.assert_array_equal( + root["temperature_dataset/date"][:], np.array(['2020-01-01', '2020-01-02']) + ) + + assert root["temperature_dataset/day_in_d"].attrs["reference_date"] == '2020-01-01' + np.testing.assert_array_equal(root["temperature_dataset/day_in_d"][:], [0, 1]) + np.testing.assert_array_equal( + root["temperature_dataset/temperatures_in_K"][:], + [[[0, 1], [2, 3]], [[4, 5], [6, 7]]], + ) + assert root["temperature_dataset/temperatures_in_K"].attrs["conversion_factor"] == 1000 + + assert root["temperature_dataset"].attrs["name"] == "my_temperature" + # Check possibility of reference date being another coords with dims set to date. + assert root["temperature_dataset"].attrs["latitude_in_deg"] == "my_latitude" + assert root["temperature_dataset"].attrs["longitude_in_deg"] == "my_longitude" + + +def test_xarray_netcdf_dumper(tmp_path): + container = _create_container() + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") + output_file_path = tmp_path / "my_container.nc" + XarrayNetCDFDumper().dump(container, to_file=output_file_path, schemaview=schemaview) + + assert os.path.exists(output_file_path) + datatree = open_datatree(output_file_path, engine='h5netcdf') + + assert datatree.attrs['name'] == 'my_container' + np.testing.assert_array_equal(datatree["latitude_series"].data, [[1, 2], [3, 4]]) + np.testing.assert_array_equal(datatree["longitude_series"].data, [[5, 6], [7, 8]]) + assert list(datatree["temperature_dataset"].coords.keys()) == ['date', 'day_in_d'] + + np.testing.assert_array_equal( + datatree["temperature_dataset"].coords["date"].values, np.array(["2020-01-01", "2020-01-02"]) + ) + np.testing.assert_array_equal(datatree["temperature_dataset"]["day_in_d"].values, [0, 1]) + assert datatree["temperature_dataset"]["day_in_d"].attrs["reference_date"] == '2020-01-01' + np.testing.assert_array_equal(datatree["temperature_dataset"]["temperatures_in_K"].values, + [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) + assert datatree["temperature_dataset"].data_vars["temperatures_in_K"].attrs["conversion_factor"] == 1000 + + assert datatree["temperature_dataset"].attrs["name"] == "my_temperature" + # Check possibility of reference date being another coords with dims set to date. + assert datatree["temperature_dataset"].attrs["latitude_in_deg"] == "my_latitude" + assert datatree["temperature_dataset"].attrs["longitude_in_deg"] == "my_longitude" def test_hdf5_dumper(tmp_path): """Test Hdf5Dumper dumping to an HDF5 file.""" container = _create_container() - schemaview = SchemaView(INPUT_DIR / "temperature_schema.yaml") + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") output_file_path = tmp_path / "my_container.h5" - Hdf5Dumper().dumps(container, schemaview=schemaview, output_file_path=output_file_path) + Hdf5Dumper().dump(container, to_file=output_file_path, schemaview=schemaview) assert os.path.exists(output_file_path) with h5py.File(output_file_path, "r") as f: @@ -139,11 +247,9 @@ def test_zarr_directory_store_dumper(tmp_path): """Test ZarrDumper dumping to an HDF5 file.""" container = _create_container() - schemaview = SchemaView(INPUT_DIR / "temperature_schema.yaml") + schemaview = SchemaView(GROUND_TRUTH_DIR / "temperature_schema.yaml") output_file_path = tmp_path / "my_container.zarr" - ZarrDirectoryStoreDumper().dumps( - container, schemaview=schemaview, output_file_path=output_file_path - ) + ZarrDirectoryStoreDumper().dump(container, to_file=output_file_path, schemaview=schemaview) assert os.path.exists(output_file_path) diff --git a/tests/test_loaders/input/container_yaml.yaml b/tests/test_loaders/input/container_yaml.yaml new file mode 100644 index 0000000..8c1c32c --- /dev/null +++ b/tests/test_loaders/input/container_yaml.yaml @@ -0,0 +1,39 @@ +name: my_container +latitude_series: + name: my_latitude + values: + - - 1 + - 2 + - - 3 + - 4 +longitude_series: + name: my_longitude + values: + - - 5 + - 6 + - - 7 + - 8 +temperature_dataset: + name: my_temperature + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + temperatures_in_K: + conversion_factor: 1000 + values: + - - - 0 + - 1 + - - 2 + - 3 + - - - 4 + - 5 + - - 6 + - 7 + date: + values: + - "2020-01-01" + - "2020-01-02" + day_in_d: + values: + - 0 + - 1 + reference_date: "2020-01-01" diff --git a/tests/test_loaders/input/container_yaml_hdf5.yaml b/tests/test_loaders/input/container_yaml_hdf5.yaml new file mode 100644 index 0000000..cfe619e --- /dev/null +++ b/tests/test_loaders/input/container_yaml_hdf5.yaml @@ -0,0 +1,34 @@ +name: my_container +latitude_series: + name: my_latitude + values: + source: + - file: ./tests/test_loaders/input/yaml_hdf5_h5s/my_latitude.values.h5 + format: hdf5 +longitude_series: + name: my_longitude + values: + source: + - file: ./tests/test_loaders/input/yaml_hdf5_h5s/my_longitude.values.h5 + format: hdf5 +temperature_dataset: + date: + values: + source: + - file: ./tests/test_loaders/input/yaml_hdf5_h5s/my_temperature.date.values.h5 + format: hdf5 + day_in_d: + reference_date: '2020-01-01' + values: + source: + - file: ./tests/test_loaders/input/yaml_hdf5_h5s/my_temperature.day_in_d.values.h5 + format: hdf5 + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + name: my_temperature + temperatures_in_K: + conversion_factor: 1000.0 + values: + source: + - file: ./tests/test_loaders/input/yaml_hdf5_h5s/my_temperature.temperatures_in_K.values.h5 + format: hdf5 diff --git a/tests/test_loaders/input/container_yaml_numpy.yaml b/tests/test_loaders/input/container_yaml_numpy.yaml new file mode 100644 index 0000000..e4a26f0 --- /dev/null +++ b/tests/test_loaders/input/container_yaml_numpy.yaml @@ -0,0 +1,34 @@ +name: my_container +latitude_series: + name: my_latitude + values: + source: + - file: "./tests/test_loaders/input/yaml_numpy_npys/my_latitude.values.npy" + format: numpy +longitude_series: + name: my_longitude + values: + source: + - file: "./tests/test_loaders/input/yaml_numpy_npys/my_longitude.values.npy" + format: numpy +temperature_dataset: + date: + values: + source: + - file: "./tests/test_loaders/input/yaml_numpy_npys/my_temperature.date.values.npy" + format: numpy + day_in_d: + reference_date: '2020-01-01' + values: + source: + - file: "./tests/test_loaders/input/yaml_numpy_npys/my_temperature.day_in_d.values.npy" + format: numpy + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + name: my_temperature + temperatures_in_K: + conversion_factor: 1000.0 + values: + source: + - file: "./tests/test_loaders/input/yaml_numpy_npys/my_temperature.temperatures_in_K.values.npy" + format: numpy diff --git a/tests/test_loaders/input/container_yaml_xarray_netcdf.yaml b/tests/test_loaders/input/container_yaml_xarray_netcdf.yaml new file mode 100644 index 0000000..9be0753 --- /dev/null +++ b/tests/test_loaders/input/container_yaml_xarray_netcdf.yaml @@ -0,0 +1,34 @@ +name: my_container +latitude_series: + name: my_latitude + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_ncs/my_latitude.values.nc" + format: netcdf +longitude_series: + name: my_longitude + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_ncs/my_longitude.values.nc" + format: netcdf +temperature_dataset: + date: + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_ncs/my_temperature.date.values.nc" + format: netcdf + day_in_d: + reference_date: '2020-01-01' + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_ncs/my_temperature.day_in_d.values.nc" + format: netcdf + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + name: my_temperature + temperatures_in_K: + conversion_factor: 1000.0 + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_ncs/my_temperature.temperatures_in_K.values.nc" + format: netcdf diff --git a/tests/test_loaders/input/container_yaml_xarray_zarr.yaml b/tests/test_loaders/input/container_yaml_xarray_zarr.yaml new file mode 100644 index 0000000..6beb610 --- /dev/null +++ b/tests/test_loaders/input/container_yaml_xarray_zarr.yaml @@ -0,0 +1,34 @@ +name: my_container +latitude_series: + name: my_latitude + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_zarrs/my_latitude.values.zarr" + format: zarr +longitude_series: + name: my_longitude + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_zarrs/my_longitude.values.zarr" + format: zarr +temperature_dataset: + date: + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_zarrs/my_temperature.date.values.zarr" + format: zarr + day_in_d: + reference_date: '2020-01-01' + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_zarrs/my_temperature.day_in_d.values.zarr" + format: zarr + latitude_in_deg: my_latitude + longitude_in_deg: my_longitude + name: my_temperature + temperatures_in_K: + conversion_factor: 1000.0 + values: + source: + - file: "./tests/test_loaders/input/yaml_xarray_zarrs/my_temperature.temperatures_in_K.values.zarr" + format: zarr diff --git a/tests/test_loaders/input/my_container.h5 b/tests/test_loaders/input/my_container.h5 new file mode 100644 index 0000000..1938a66 Binary files /dev/null and b/tests/test_loaders/input/my_container.h5 differ diff --git a/tests/test_loaders/input/my_container.nc b/tests/test_loaders/input/my_container.nc new file mode 100644 index 0000000..100f811 Binary files /dev/null and b/tests/test_loaders/input/my_container.nc differ diff --git a/tests/test_loaders/input/my_container.zarr/.zattrs b/tests/test_loaders/input/my_container.zarr/.zattrs new file mode 100644 index 0000000..060eade --- /dev/null +++ b/tests/test_loaders/input/my_container.zarr/.zattrs @@ -0,0 +1,3 @@ +{ + "name": "my_container" +} \ No newline at end of file diff --git a/tests/test_loaders/input/my_container.zarr/.zgroup b/tests/test_loaders/input/my_container.zarr/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/test_loaders/input/my_container.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/test_loaders/input/my_container.zarr/latitude_series/.zattrs b/tests/test_loaders/input/my_container.zarr/latitude_series/.zattrs new file mode 100644 index 0000000..6b698b1 --- /dev/null +++ b/tests/test_loaders/input/my_container.zarr/latitude_series/.zattrs @@ -0,0 +1,3 @@ +{ + "name": "my_latitude" +} \ No newline at end of file diff --git a/tests/test_loaders/input/my_container.zarr/latitude_series/.zgroup b/tests/test_loaders/input/my_container.zarr/latitude_series/.zgroup new file mode 100644 index 0000000..3b7daf2 --- /dev/null +++ b/tests/test_loaders/input/my_container.zarr/latitude_series/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/tests/test_loaders/input/my_container.zarr/latitude_series/values/.zarray b/tests/test_loaders/input/my_container.zarr/latitude_series/values/.zarray new file mode 100644 index 0000000..fe4bc83 --- /dev/null +++ b/tests/test_loaders/input/my_container.zarr/latitude_series/values/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "