Skip to content

Commit 3e3cb29

Browse files
mattclarkeg5t
andauthored
Add da00 dataarray (#97)
* Fixed import order? * Accept formatting changes * [Refactor] rename hm01 to hm00, since that draft has been dropped * [Add] draft schema: da00 and hm00 - hm00 was designed to combine features from hs00 and ADAr, including labeled histogram axes and histogram signal errors. Ultimately its implementation is likely overly complex and too specialized. - da00 is a simplified form of hm00: all arrays are handled the same way, so that, e.g., the 'signal' histogram, its 'errors' and all of its axes vectors are stored in one list of Variable objects. In addition to making the serialise/deserialise methods simpler, this adds flexibility to what can be stored in the buffer. For example, the axes do not _need_ to be bin-edges _nor_ vectors. The hope is that these objects can be passed through HDF5/NeXus structures more-easily into scipp data structures. * [Ref.] minor changes, following recent ADAr changes * [Refactor] Use new da00 schema * Remove (writer) config-related structure from da00 * Update generated da00 flatbuffer files The da00 no longer requires `axes` names on Variables, and uses a signed integer for timestamps. These changes have no immediate impact on the python serializer and deserializer methods. * [Remove] dropped draft hm00 schema * [Remove] straggler hm00 test file * Update streaming_data_types/dataarray_da00.py * Update streaming_data_types/dataarray_da00.py * use unix timestamps * use np * added test for no data * remove typing that isn't supported in 3.8 * missed some typing * fixed typing for reals * Update README.md --------- Co-authored-by: Gregory Tucker <[email protected]>
1 parent bff89dc commit 3e3cb29

File tree

8 files changed

+830
-0
lines changed

8 files changed

+830
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ https://github.com/ess-dmsc/streaming-data-types
3131
| senv | **Deprecated** |
3232
| json | Generic JSON data |
3333
| se00 | Arrays with optional timestamps, for example waveform data. Replaces _senv_. |
34+
| da00 | Scipp-like data arrays, for histograms, etc. |
3435

3536
### hs00 and hs01
3637
Schema for histogram data. It is one of the more complicated to use schemas.

streaming_data_types/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from streaming_data_types.area_detector_ADAr import deserialise_ADAr, serialise_ADAr
66
from streaming_data_types.area_detector_NDAr import deserialise_ndar, serialise_ndar
77
from streaming_data_types.array_1d_se00 import deserialise_se00, serialise_se00
8+
from streaming_data_types.dataarray_da00 import deserialise_da00, serialise_da00
89
from streaming_data_types.epics_connection_ep01 import deserialise_ep01, serialise_ep01
910
from streaming_data_types.epics_connection_info_ep00 import (
1011
deserialise_ep00,
@@ -60,6 +61,7 @@
6061
"al00": serialise_al00,
6162
"json": serialise_json,
6263
"ad00": serialise_ad00,
64+
"da00": serialise_da00,
6365
}
6466

6567

@@ -88,4 +90,5 @@
8890
"al00": deserialise_al00,
8991
"json": deserialise_json,
9092
"ad00": deserialise_ad00,
93+
"da00": deserialise_da00,
9194
}
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
from dataclasses import dataclass
2+
from struct import pack
3+
from typing import List, NamedTuple, Tuple, Union
4+
5+
import flatbuffers
6+
import numpy as np
7+
8+
import streaming_data_types.fbschemas.dataarray_da00.da00_Variable as VariableBuffer
9+
from streaming_data_types.fbschemas.dataarray_da00 import da00_DataArray
10+
from streaming_data_types.fbschemas.dataarray_da00.da00_dtype import da00_dtype
11+
from streaming_data_types.utils import check_schema_identifier
12+
13+
FILE_IDENTIFIER = b"da00"
14+
15+
16+
def get_dtype(data: Union[np.ndarray, str, float, int]):
17+
if isinstance(data, np.ndarray):
18+
type_map = {
19+
np.dtype(x): d
20+
for x, d in (
21+
("int8", da00_dtype.int8),
22+
("int16", da00_dtype.int16),
23+
("int32", da00_dtype.int32),
24+
("int64", da00_dtype.int64),
25+
("uint8", da00_dtype.uint8),
26+
("uint16", da00_dtype.uint16),
27+
("uint32", da00_dtype.uint32),
28+
("uint64", da00_dtype.uint64),
29+
("float32", da00_dtype.float32),
30+
("float64", da00_dtype.float64),
31+
)
32+
}
33+
return type_map[data.dtype]
34+
if isinstance(data, str):
35+
return da00_dtype.c_string
36+
if isinstance(data, float):
37+
return da00_dtype.float64
38+
if isinstance(data, int):
39+
return da00_dtype.int64
40+
raise RuntimeError(f"Unsupported data type {type(data)} in get_dtype")
41+
42+
43+
def to_buffer(data: Union[np.ndarray, str, float, int]):
44+
if isinstance(data, np.ndarray):
45+
return data
46+
if isinstance(data, str):
47+
return np.frombuffer(data.encode(), np.uint8)
48+
if isinstance(data, int):
49+
return np.frombuffer(pack("q", data), np.uint8)
50+
if isinstance(data, float):
51+
return np.frombuffer(pack("d", data), np.uint8)
52+
raise RuntimeError(f"Unsupported data type {type(data)} in to_buffer")
53+
54+
55+
def from_buffer(fb_array) -> np.ndarray:
56+
"""Convert a flatbuffer array into the correct type"""
57+
raw_data = fb_array.DataAsNumpy()
58+
type_map = {
59+
d: np.dtype(x)
60+
for x, d in (
61+
("int8", da00_dtype.int8),
62+
("int16", da00_dtype.int16),
63+
("int32", da00_dtype.int32),
64+
("int64", da00_dtype.int64),
65+
("uint8", da00_dtype.uint8),
66+
("uint16", da00_dtype.uint16),
67+
("uint32", da00_dtype.uint32),
68+
("uint64", da00_dtype.uint64),
69+
("float32", da00_dtype.float32),
70+
("float64", da00_dtype.float64),
71+
)
72+
}
73+
dtype = fb_array.DataType()
74+
if da00_dtype.c_string == dtype:
75+
return raw_data.tobytes().decode()
76+
return raw_data.view(type_map[fb_array.DataType()])
77+
78+
79+
def create_optional_string(builder, string: Union[str, None]):
80+
return None if string is None else builder.CreateString(string)
81+
82+
83+
@dataclass
84+
class Variable:
85+
name: str
86+
data: Union[np.ndarray, str]
87+
axes: Union[List[str], None] = None
88+
shape: Union[Tuple[int, ...], None] = None
89+
unit: Union[str, None] = None
90+
label: Union[str, None] = None
91+
source: Union[str, None] = None
92+
93+
def __post_init__(self):
94+
# Calculate the shape when used, e.g., interactively
95+
# -- but allow to read it back from the buffered object too
96+
if self.axes is None:
97+
self.axes = []
98+
if self.shape is None:
99+
self.shape = to_buffer(self.data).shape
100+
101+
def __eq__(self, other):
102+
if not isinstance(other, Variable):
103+
return False
104+
same_data = type(self.data) == type(other.data) # noqa: E721
105+
if isinstance(self.data, np.ndarray):
106+
same_data &= np.array_equal(self.data, other.data)
107+
else:
108+
same_data &= self.data == other.data
109+
same_axes = len(self.axes) == len(other.axes) and all(
110+
a == b for a, b in zip(self.axes, other.axes)
111+
)
112+
return (
113+
same_data
114+
and same_axes
115+
and self.name == other.name
116+
and self.unit == other.unit
117+
and self.label == other.label
118+
and self.source == other.source
119+
and self.shape == other.shape
120+
)
121+
122+
def pack(self, builder):
123+
source_offset = create_optional_string(builder, self.source)
124+
label_offset = create_optional_string(builder, self.label)
125+
unit_offset = create_optional_string(builder, self.unit)
126+
name_offset = builder.CreateString(self.name)
127+
buf = to_buffer(self.data)
128+
shape_offset = builder.CreateNumpyVector(np.asarray(buf.shape))
129+
data_offset = builder.CreateNumpyVector(buf.flatten().view(np.uint8))
130+
131+
temp_axes = [builder.CreateString(x) for x in self.axes]
132+
VariableBuffer.StartAxesVector(builder, len(temp_axes))
133+
for dim in reversed(temp_axes):
134+
builder.PrependUOffsetTRelative(dim)
135+
axes_offset = builder.EndVector()
136+
137+
VariableBuffer.Start(builder)
138+
VariableBuffer.AddName(builder, name_offset)
139+
if unit_offset is not None:
140+
VariableBuffer.AddUnit(builder, unit_offset)
141+
if label_offset is not None:
142+
VariableBuffer.AddLabel(builder, label_offset)
143+
if source_offset is not None:
144+
VariableBuffer.AddSource(builder, source_offset)
145+
VariableBuffer.AddDataType(builder, get_dtype(self.data))
146+
VariableBuffer.AddAxes(builder, axes_offset)
147+
VariableBuffer.AddShape(builder, shape_offset)
148+
VariableBuffer.AddData(builder, data_offset)
149+
return VariableBuffer.End(builder)
150+
151+
@classmethod
152+
def unpack(cls, b: VariableBuffer):
153+
data = from_buffer(b)
154+
axes = [b.Axes(i).decode() for i in range(b.AxesLength())]
155+
if len(axes):
156+
data = data.reshape(b.ShapeAsNumpy())
157+
elif b.DataType() != da00_dtype.c_string and np.prod(data.shape) == 1:
158+
data = data.item()
159+
160+
unit = None if b.Unit() is None else b.Unit().decode()
161+
label = None if b.Label() is None else b.Label().decode()
162+
source = None if b.Source() is None else b.Source().decode()
163+
name = b.Name().decode()
164+
# the buffered shape is NOT the shape of the numpy array in all cases
165+
buffered_shape = tuple(b.ShapeAsNumpy())
166+
return cls(
167+
name=name,
168+
unit=unit,
169+
label=label,
170+
source=source,
171+
axes=axes,
172+
data=data,
173+
shape=buffered_shape,
174+
)
175+
176+
177+
def insert_variable_list(starter, builder, objects: List[Variable]):
178+
temp = [obj.pack(builder) for obj in objects]
179+
starter(builder, len(temp))
180+
for obj in reversed(temp):
181+
builder.PrependUOffsetTRelative(obj)
182+
return builder.EndVector()
183+
184+
185+
def serialise_da00(
186+
source_name: str,
187+
timestamp_ns: int,
188+
data: List[Variable],
189+
) -> bytes:
190+
if not data:
191+
raise RuntimeError("data must contain at least one Variable")
192+
builder = flatbuffers.Builder(1024)
193+
builder.ForceDefaults(True)
194+
195+
data_offset = insert_variable_list(da00_DataArray.StartDataVector, builder, data)
196+
source_name_offset = builder.CreateString(source_name)
197+
198+
# Build the actual buffer
199+
da00_DataArray.Start(builder)
200+
da00_DataArray.AddSourceName(builder, source_name_offset)
201+
da00_DataArray.AddTimestamp(builder, timestamp_ns)
202+
da00_DataArray.AddData(builder, data_offset)
203+
array_message = da00_DataArray.End(builder)
204+
205+
builder.Finish(array_message, file_identifier=FILE_IDENTIFIER)
206+
return bytes(builder.Output())
207+
208+
209+
da00_DataArray_t = NamedTuple(
210+
"da00_DataArray",
211+
(
212+
("source_name", str),
213+
("timestamp_ns", int),
214+
("data", List[Variable]),
215+
),
216+
)
217+
218+
219+
def deserialise_da00(buffer: Union[bytearray, bytes]) -> da00_DataArray:
220+
check_schema_identifier(buffer, FILE_IDENTIFIER)
221+
222+
da00 = da00_DataArray.da00_DataArray.GetRootAs(buffer, offset=0)
223+
data = [Variable.unpack(da00.Data(j)) for j in range(da00.DataLength())]
224+
225+
return da00_DataArray_t(
226+
source_name=da00.SourceName().decode(),
227+
timestamp_ns=da00.Timestamp(),
228+
data=data,
229+
)

streaming_data_types/fbschemas/dataarray_da00/__init__.py

Whitespace-only changes.
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# automatically generated by the FlatBuffers compiler, do not modify
2+
3+
# namespace:
4+
5+
import flatbuffers
6+
from flatbuffers.compat import import_numpy
7+
8+
np = import_numpy()
9+
10+
11+
class da00_DataArray(object):
12+
__slots__ = ["_tab"]
13+
14+
@classmethod
15+
def GetRootAs(cls, buf, offset=0):
16+
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
17+
x = da00_DataArray()
18+
x.Init(buf, n + offset)
19+
return x
20+
21+
@classmethod
22+
def GetRootAsda00_DataArray(cls, buf, offset=0):
23+
"""This method is deprecated. Please switch to GetRootAs."""
24+
return cls.GetRootAs(buf, offset)
25+
26+
@classmethod
27+
def da00_DataArrayBufferHasIdentifier(cls, buf, offset, size_prefixed=False):
28+
return flatbuffers.util.BufferHasIdentifier(
29+
buf, offset, b"\x64\x61\x30\x30", size_prefixed=size_prefixed
30+
)
31+
32+
# da00_DataArray
33+
def Init(self, buf, pos):
34+
self._tab = flatbuffers.table.Table(buf, pos)
35+
36+
# da00_DataArray
37+
def SourceName(self):
38+
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
39+
if o != 0:
40+
return self._tab.String(o + self._tab.Pos)
41+
return None
42+
43+
# da00_DataArray
44+
def Timestamp(self):
45+
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
46+
if o != 0:
47+
return self._tab.Get(flatbuffers.number_types.Int64Flags, o + self._tab.Pos)
48+
return 0
49+
50+
# da00_DataArray
51+
def Data(self, j):
52+
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
53+
if o != 0:
54+
x = self._tab.Vector(o)
55+
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
56+
x = self._tab.Indirect(x)
57+
from streaming_data_types.fbschemas.dataarray_da00.da00_Variable import (
58+
da00_Variable,
59+
)
60+
61+
obj = da00_Variable()
62+
obj.Init(self._tab.Bytes, x)
63+
return obj
64+
return None
65+
66+
# da00_DataArray
67+
def DataLength(self):
68+
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
69+
if o != 0:
70+
return self._tab.VectorLen(o)
71+
return 0
72+
73+
# da00_DataArray
74+
def DataIsNone(self):
75+
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
76+
return o == 0
77+
78+
79+
def da00_DataArrayStart(builder):
80+
builder.StartObject(3)
81+
82+
83+
def Start(builder):
84+
return da00_DataArrayStart(builder)
85+
86+
87+
def da00_DataArrayAddSourceName(builder, sourceName):
88+
builder.PrependUOffsetTRelativeSlot(
89+
0, flatbuffers.number_types.UOffsetTFlags.py_type(sourceName), 0
90+
)
91+
92+
93+
def AddSourceName(builder, sourceName):
94+
return da00_DataArrayAddSourceName(builder, sourceName)
95+
96+
97+
def da00_DataArrayAddTimestamp(builder, timestamp):
98+
builder.PrependInt64Slot(1, timestamp, 0)
99+
100+
101+
def AddTimestamp(builder, timestamp):
102+
return da00_DataArrayAddTimestamp(builder, timestamp)
103+
104+
105+
def da00_DataArrayAddData(builder, data):
106+
builder.PrependUOffsetTRelativeSlot(
107+
2, flatbuffers.number_types.UOffsetTFlags.py_type(data), 0
108+
)
109+
110+
111+
def AddData(builder, data):
112+
return da00_DataArrayAddData(builder, data)
113+
114+
115+
def da00_DataArrayStartDataVector(builder, numElems):
116+
return builder.StartVector(4, numElems, 4)
117+
118+
119+
def StartDataVector(builder, numElems):
120+
return da00_DataArrayStartDataVector(builder, numElems)
121+
122+
123+
def da00_DataArrayEnd(builder):
124+
return builder.EndObject()
125+
126+
127+
def End(builder):
128+
return da00_DataArrayEnd(builder)

0 commit comments

Comments
 (0)