Skip to content

Commit

Permalink
Merge pull request #502 from scipp/fix-json-nexus-loader
Browse files Browse the repository at this point in the history
fix: adapt json loader to new schema
  • Loading branch information
jokasimr authored Mar 19, 2024
2 parents 0988a27 + f9e25ef commit 9764054
Show file tree
Hide file tree
Showing 10 changed files with 1,490 additions and 195 deletions.
181 changes: 65 additions & 116 deletions src/scippneutron/io/nexus/_json_nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from typing import Any, Dict, List, Tuple, Union

import numpy as np
from scippnexus.typing import H5Group

_nexus_class = "NX_class"
_nexus_units = "units"
_nexus_name = "name"
_nexus_path = "path"
_nexus_values = "values"
_nexus_dataset = "dataset"
_nexus_config = "config"
_nexus_group = "group"
_nexus_children = "children"
_nexus_link = "link"
Expand Down Expand Up @@ -87,12 +87,13 @@ def make_json_dataset(name: str, data) -> dict:
"size": data.shape,
"type": numpy_to_filewriter_type[data.dtype.type],
}

return {
"type": "dataset",
"name": name,
"values": data,
"dataset": dataset_info,
'module': _nexus_dataset,
_nexus_config: {
**dataset_info,
_nexus_name: name,
_nexus_values: data,
},
"attributes": [],
}

Expand All @@ -117,100 +118,43 @@ def _get_attribute_value(
raise MissingAttribute


class _Node(dict):
def __init__(self, parent: dict, name: str, file: dict, group: dict):
super().__init__(**group)
self.parent = parent
self.name = name
self.file = file
def _visitnodes(root: Dict):
for child in root.get(_nexus_children, ()):
yield child
yield from _visitnodes(child)


def _visit_nodes(
root: Dict,
group: Dict,
nx_class_names: Tuple[str, ...],
groups_with_requested_nx_class: Dict[str, List[H5Group]],
path: List[str],
):
try:
for child in group[_nexus_children]:
try:
path.append(child[_nexus_name])
except KeyError:
# If the object doesn't have a name it can't be a NeXus
# class we are looking for, nor can it be a group
# containing a NeXus class we are looking for, so skip to
# next object
continue
try:
nx_class = _get_attribute_value(child, _nexus_class)
if nx_class in nx_class_names:
groups_with_requested_nx_class[nx_class].append(
_Node(group=child, parent=group, name="/".join(path), file=root)
)
except MissingAttribute:
# It may be a group but not an NX_class,
# that's fine, continue to its children
pass
_visit_nodes(
root, child, nx_class_names, groups_with_requested_nx_class, path
)
path.pop(-1)
except KeyError:
pass
def _name(node: Dict):
if _nexus_name in node:
return node[_nexus_name]
if _nexus_config in node:
return node[_nexus_config][_nexus_name]
return ''


def contains_stream(group: Dict) -> bool:
"""
Return True if the group contains a stream object
"""
if not isinstance(group, JSONGroup):
return False
try:
for child in group._node[_nexus_children]:
try:
if child["type"] == _nexus_stream:
return True
except KeyError:
# "type" field ought to exist, but if it does
# not then assume it is not a stream
pass
except KeyError:
# "children" field may be missing, that is okay
# but means this this group cannot contain a stream
pass
return False
def _is_group(node: Dict):
return _nexus_children in node


def _find_by_type(type_name: str, root: Dict) -> List[H5Group]:
"""
Finds objects with the requested "type" value
Returns a list of objects with requested type
"""
def _is_dataset(node: Dict):
return node.get('module') == _nexus_dataset


def _is_link(node: Dict):
return node.get('module') == _nexus_link

def _visit_nodes_for_type(
obj: Dict, requested_type: str, objects_found: List[H5Group]
):
try:
for child in obj[_nexus_children]:
if child["type"] == requested_type:
objects_found.append(
_Node(
group=child,
parent=obj,
name="",
file={_nexus_children: [obj]},
)
)
_visit_nodes_for_type(child, requested_type, objects_found)
except KeyError:
# If this object does not have "children" array then go to next
pass

objects_with_requested_type: List[H5Group] = []
_visit_nodes_for_type(root, type_name, objects_with_requested_type)
def _is_stream(node: Dict):
return 'module' in node and not (_is_dataset(node) or _is_link(node))

return objects_with_requested_type

def contains_stream(group: JSONGroup) -> bool:
"""Return True if the group contains a stream object"""
return (
isinstance(group, JSONGroup)
and _nexus_children in group._node
and any(map(_is_stream, group._node[_nexus_children]))
)


class JSONTypeStringID:
Expand Down Expand Up @@ -274,10 +218,11 @@ def __init__(self, node: dict, *, parent=None):
self._file = parent.file if parent is not None else self
self._parent = self if parent is None else parent
self._node = node
name = _name(self._node)
if parent is None or parent.name == '/':
self._name = f'/{self._node.get(_nexus_name, "")}'
self._name = f'/{name}'
else:
self._name = f'{parent.name}/{self._node[_nexus_name]}'
self._name = f'{parent.name}/{name}'

@property
def attrs(self) -> JSONAttributeManager:
Expand All @@ -300,9 +245,14 @@ class JSONDataset(JSONNode):
@property
def dtype(self) -> str:
try:
dtype = self._node[_nexus_dataset]["type"]
dtype = self._node[_nexus_config]["type"]
except KeyError:
dtype = self._node[_nexus_dataset]["dtype"]
if "dtype" not in self._node[_nexus_config] and isinstance(
self._node[_nexus_config][_nexus_values], str
):
dtype = 'string'
else:
dtype = self._node[_nexus_config]["dtype"]
if dtype == 'string':
return np.dtype(str)
return np.dtype(dtype)
Expand All @@ -313,10 +263,10 @@ def ndim(self) -> int:

@property
def shape(self):
return np.asarray(self._node[_nexus_values]).shape
return np.asarray(self._node[_nexus_config][_nexus_values]).shape

def __getitem__(self, index):
return np.asarray(self._node[_nexus_values])[index]
return np.asarray(self._node[_nexus_config][_nexus_values])[index]

def read_direct(self, buf, source_sel):
buf[...] = self[source_sel]
Expand All @@ -337,16 +287,15 @@ def keys(self) -> List[str]:
if contains_stream(self):
return []
children = self._node[_nexus_children]
return [child[_nexus_name] for child in children if not contains_stream(child)]
return [_name(child) for child in children if not contains_stream(child)]

def items(self) -> List[Tuple[str, JSONNode]]:
return [(key, self[key]) for key in self.keys()]

def _as_group_or_dataset(self, item, parent):
if item['type'] == _nexus_group:
if _is_group(item):
return JSONGroup(item, parent=parent)
else:
return JSONDataset(item, parent=parent)
return JSONDataset(item, parent=parent)

def __getitem__(self, name: str) -> Union[JSONDataset, JSONGroup]:
if name.startswith('/') and name.count('/') == 1:
Expand All @@ -357,11 +306,11 @@ def __getitem__(self, name: str) -> Union[JSONDataset, JSONGroup]:
parent = self

for child in parent._node[_nexus_children]:
if child.get(_nexus_name) != name.split('/')[-1]:
if _name(child) != name.split('/')[-1]:
continue
if child.get('type') == _nexus_link:
return self[child["target"]]
if child.get('type') in (_nexus_dataset, _nexus_group):
if _is_link(child):
return self[child[_nexus_config]["target"]]
if _is_group(child) or _is_dataset(child):
return self._as_group_or_dataset(child, parent)

raise KeyError(f"Unable to open object (object '{name}' doesn't exist)")
Expand All @@ -371,12 +320,10 @@ def __iter__(self):

def visititems(self, callable):
def skip(node):
return node['type'] == _nexus_link or contains_stream(self)
return _is_link(node) or contains_stream(self)

children = [
child[_nexus_name]
for child in self._node[_nexus_children]
if not skip(child)
_name(child) for child in self._node[_nexus_children] if not skip(child)
]
for key in children:
item = self[key]
Expand Down Expand Up @@ -409,28 +356,30 @@ class StreamInfo:


def get_streams_info(root: Dict) -> List[StreamInfo]:
found_streams = _find_by_type(_nexus_stream, root)
found_streams = [node for node in _visitnodes(root) if _is_stream(node)]
streams = []
for stream in found_streams:
try:
dtype = _filewriter_to_supported_numpy_dtype[stream["stream"]["dtype"]]
dtype = _filewriter_to_supported_numpy_dtype[stream[_nexus_config]["dtype"]]
except KeyError:
try:
dtype = _filewriter_to_supported_numpy_dtype[stream["stream"]["type"]]
dtype = _filewriter_to_supported_numpy_dtype[
stream[_nexus_config]["type"]
]
except KeyError:
dtype = None

units = "dimensionless"
try:
units = _get_attribute_value(stream.parent, _nexus_units)
units = _get_attribute_value(stream, _nexus_units)
except MissingAttribute:
pass

streams.append(
StreamInfo(
stream["stream"]["topic"],
stream["stream"]["writer_module"],
stream["stream"]["source"],
stream[_nexus_config]["topic"],
stream["module"],
stream[_nexus_config]["source"],
dtype,
units,
)
Expand Down
20 changes: 20 additions & 0 deletions tests/io/json_nexus_examples/array_dataset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"module": "dataset",
"config": {
"name": "slit_edges",
"values": [
0.0,
15.0,
180.0,
195.0
],
"type": "double"
},
"attributes": [
{
"name": "units",
"dtype": "string",
"values": "deg"
}
]
}
8 changes: 8 additions & 0 deletions tests/io/json_nexus_examples/dataset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"module": "dataset",
"config": {
"name": "name",
"values": "YMIR",
"type": "string"
}
}
18 changes: 9 additions & 9 deletions tests/io/json_nexus_examples/detector.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
"name": "detector_0",
"children": [
{
"type": "dataset",
"name": "detector_number",
"values": [
1,
2,
3,
4
],
"dataset": {
"module": "dataset",
"config": {
"name": "detector_number",
"values": [
1,
2,
3,
4
],
"size": [
4
],
Expand Down
11 changes: 5 additions & 6 deletions tests/io/json_nexus_examples/entry.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
"name": "entry",
"children": [
{
"type": "dataset",
"name": "title",
"values": "my experiment",
"dataset": {
"module": "dataset",
"config": {
"name": "title",
"values": "my experiment",
"string_size": 13,
"type": "string"
},
"attributes": []
}
}
],
"attributes": [
Expand Down
Loading

0 comments on commit 9764054

Please sign in to comment.