Skip to content

Commit 45c0dc4

Browse files
committed
test: adapted variable names; function and comment tp the new "column" terminology in tests
1 parent 16cca67 commit 45c0dc4

File tree

2 files changed

+75
-75
lines changed

2 files changed

+75
-75
lines changed

tests/tests_tensorflow/datasets/test_tf_datahandler.py

+38-38
Original file line numberDiff line numberDiff line change
@@ -32,26 +32,26 @@
3232
from tests.tests_tensorflow import generate_data_tf
3333

3434

35-
def assign_feature_value(
36-
dataset: tf.data.Dataset, feature_key: str, value: int
35+
def assign_value_to_column(
36+
dataset: tf.data.Dataset, column_name: str, value: int
3737
) -> tf.data.Dataset:
38-
"""Assign a value to a feature for every sample in a tf.data.Dataset
38+
"""Assign a value to a column for every sample in a tf.data.Dataset
3939
4040
Args:
4141
dataset (tf.data.Dataset): tf.data.Dataset to assign the value to
42-
feature_key (str): Feature to assign the value to
42+
column_name (str): Column to assign the value to
4343
value (int): Value to assign
4444
4545
Returns:
4646
tf.data.Dataset
4747
"""
4848
assert isinstance(dataset.element_spec, dict), "dataset elements must be dicts"
4949

50-
def assign_value_to_feature(x):
51-
x[feature_key] = value
50+
def assign_value_to_column(x):
51+
x[column_name] = value
5252
return x
5353

54-
dataset = dataset.map(assign_value_to_feature)
54+
dataset = dataset.map(assign_value_to_column)
5555
return dataset
5656

5757

@@ -72,24 +72,24 @@ def get_dataset_length(dataset: tf.data.Dataset) -> int:
7272
return int(cardinality)
7373

7474

75-
def get_feature_from_ds(dataset: tf.data.Dataset, feature_key: str) -> np.ndarray:
76-
"""Get a feature from a tf.data.Dataset
75+
def get_column_from_ds(dataset: tf.data.Dataset, column_name: str) -> np.ndarray:
76+
"""Get a column from a tf.data.Dataset
7777
7878
!!! note
7979
This function can be a bit time consuming since it needs to iterate
8080
over the whole dataset.
8181
8282
Args:
83-
dataset (tf.data.Dataset): tf.data.Dataset to get the feature from
84-
feature_key (str): Feature value to get
83+
dataset (tf.data.Dataset): tf.data.Dataset to get the column from
84+
column_name (str): Column value to get
8585
8686
Returns:
87-
np.ndarray: Feature values for dataset
87+
np.ndarray: Column values for dataset
8888
"""
89-
features = dataset.map(lambda x: x[feature_key])
90-
features = list(features.as_numpy_iterator())
91-
features = np.array(features)
92-
return features
89+
columns = dataset.map(lambda x: x[column_name])
90+
columns = list(columns.as_numpy_iterator())
91+
columns = np.array(columns)
92+
return columns
9393

9494

9595
def test_instanciate_tf_datahandler():
@@ -179,11 +179,11 @@ def test_load_tensorflow_datasets(dataset_name, train):
179179
# dummy item
180180
for item in dataset.take(1):
181181
dummy_item = item
182-
dummy_keys = list(dummy_item.keys())
182+
dummy_columns = list(dummy_item.keys())
183183
dummy_shapes = [v.shape for v in dummy_item.values()]
184184

185-
# check keys
186-
assert list(dataset.element_spec.keys()) == dummy_keys == ["image", "label"]
185+
# check columns
186+
assert list(dataset.element_spec.keys()) == dummy_columns == ["image", "label"]
187187

188188
# check output shape
189189
assert (
@@ -230,18 +230,18 @@ def test_load_arrays_and_custom(x_shape, num_labels, num_samples, one_hot):
230230
for dataset_id in [tuple_np, dict_np, tuple_tf, dict_tf, tensor_ds_tf]:
231231
ds = handler.load_dataset(dataset_id, columns=["key_a", "key_b"])
232232

233-
# check registered keys, shapes
234-
output_keys = list(ds.element_spec.keys())
233+
# check registered columns, shapes
234+
output_columns = list(ds.element_spec.keys())
235235
output_shapes = [ds.element_spec[key].shape for key in ds.element_spec.keys()]
236-
assert output_keys == ["key_a", "key_b"]
236+
assert output_columns == ["key_a", "key_b"]
237237
assert output_shapes == [
238238
tf.TensorShape(x_shape),
239239
tf.TensorShape([num_labels] if one_hot else []),
240240
]
241-
# check item keys, shapes
241+
# check item columns, shapes
242242
for item in ds.take(1):
243243
dummy_item = item
244-
assert list(dummy_item.keys()) == output_keys
244+
assert list(dummy_item.keys()) == output_columns
245245
assert list(map(lambda x: x.shape, dummy_item.values())) == output_shapes
246246

247247

@@ -276,29 +276,29 @@ def test_data_handler_full_pipeline(x_shape, num_samples, num_labels, one_hot):
276276
num_samples_b = get_dataset_length(dataset_b)
277277
assert num_samples == (num_samples_a + num_samples_b)
278278

279-
# assign feature, map, get feature
279+
# assign column, map, get column
280280
def map_fn_a(item):
281-
item["new_feature"] -= 3
281+
item["new_column"] -= 3
282282
return item
283283

284284
def map_fn_b(item):
285-
item["new_feature"] = item["new_feature"] * 3 + 2
285+
item["new_column"] = item["new_column"] * 3 + 2
286286
return item
287287

288-
dataset_a = assign_feature_value(dataset_a, "new_feature", 0)
288+
dataset_a = assign_value_to_column(dataset_a, "new_column", 0)
289289
dataset_a = dataset_a.map(map_fn_a)
290-
features_a = tf.convert_to_tensor(get_feature_from_ds(dataset_a, "new_feature"))
291-
assert tf.reduce_all(features_a == tf.convert_to_tensor([-3] * num_samples_a))
290+
columns_a = tf.convert_to_tensor(get_column_from_ds(dataset_a, "new_column"))
291+
assert tf.reduce_all(columns_a == tf.convert_to_tensor([-3] * num_samples_a))
292292

293-
dataset_b = assign_feature_value(dataset_b, "new_feature", 1)
293+
dataset_b = assign_value_to_column(dataset_b, "new_column", 1)
294294
dataset_b = dataset_b.map(map_fn_b)
295-
features_b = tf.convert_to_tensor(get_feature_from_ds(dataset_b, "new_feature"))
296-
assert tf.reduce_all(features_b == tf.convert_to_tensor([5] * num_samples_b))
295+
columns_b = tf.convert_to_tensor(get_column_from_ds(dataset_b, "new_column"))
296+
assert tf.reduce_all(columns_b == tf.convert_to_tensor([5] * num_samples_b))
297297

298298
# concatenate two sub datasets
299299
dataset_c = handler.merge(dataset_a, dataset_b)
300-
features_c = tf.convert_to_tensor(get_feature_from_ds(dataset_c, "new_feature"))
301-
assert tf.reduce_all(features_c == tf.concat([features_a, features_b], axis=0))
300+
columns_c = tf.convert_to_tensor(get_column_from_ds(dataset_c, "new_column"))
301+
assert tf.reduce_all(columns_c == tf.concat([columns_a, columns_b], axis=0))
302302

303303
# prepare dataloader
304304
loader = handler.prepare(dataset_c, 64, shuffle=True)
@@ -406,13 +406,13 @@ def test_split_by_class(in_labels, out_labels, one_hot, expected_output):
406406
len_inds = get_dataset_length(in_dataset)
407407
len_outds = get_dataset_length(out_dataset)
408408

409-
classes = get_feature_from_ds(dataset, "label")
409+
classes = get_column_from_ds(dataset, "label")
410410
classes = np.unique(classes, axis=0)
411411

412-
classes_in = get_feature_from_ds(in_dataset, "label")
412+
classes_in = get_column_from_ds(in_dataset, "label")
413413
classes_in = np.unique(classes_in, axis=0)
414414

415-
classes_out = get_feature_from_ds(out_dataset, "label")
415+
classes_out = get_column_from_ds(out_dataset, "label")
416416
classes_out = np.unique(classes_out, axis=0)
417417

418418
assert len_ds == expected_output[0]

tests/tests_torch/datasets/test_torch_datahandler.py

+37-37
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@
3636
from tests.tests_torch import generate_data_torch
3737

3838

39-
def assign_feature_value(
40-
dataset: DictDataset, feature_key: str, value: int
39+
def assign_value_to_column(
40+
dataset: DictDataset, column_name: str, value: int
4141
) -> DictDataset:
42-
"""Assign a value to a feature for every sample in a DictDataset
42+
"""Assign a value to a column for every sample in a DictDataset
4343
4444
Args:
4545
dataset (DictDataset): DictDataset to assign the value to
46-
feature_key (str): Feature to assign the value to
46+
column_name (str): Column to assign the value to
4747
value (int): Value to assign
4848
4949
Returns:
@@ -53,11 +53,11 @@ def assign_feature_value(
5353
dataset, DictDataset
5454
), "Dataset must be an instance of DictDataset"
5555

56-
def assign_value_to_feature(x):
57-
x[feature_key] = torch.tensor(value)
56+
def assign_value(x):
57+
x[column_name] = torch.tensor(value)
5858
return x
5959

60-
dataset = dataset.map(assign_value_to_feature)
60+
dataset = dataset.map(assign_value)
6161
return dataset
6262

6363

@@ -74,24 +74,24 @@ def get_dataset_length(dataset: Dataset) -> int:
7474

7575

7676
@dict_only_ds
77-
def get_feature_from_ds(dataset: DictDataset, feature_key: str) -> np.ndarray:
78-
"""Get a feature from a DictDataset
77+
def get_column_from_ds(dataset: DictDataset, column_name: str) -> np.ndarray:
78+
"""Get a column from a DictDataset
7979
8080
!!! note
8181
This function can be a bit time consuming since it needs to iterate
8282
over the whole dataset.
8383
8484
Args:
85-
dataset (DictDataset): Dataset to get the feature from
86-
feature_key (str): Feature value to get
85+
dataset (DictDataset): Dataset to get the column from
86+
column_name (str): Column value to get
8787
8888
Returns:
89-
np.ndarray: Feature values for dataset
89+
np.ndarray: Column values for dataset
9090
"""
9191

92-
features = dataset.map(lambda x: x[feature_key])
93-
features = np.stack([f.numpy() for f in features])
94-
return features
92+
columns = dataset.map(lambda x: x[column_name])
93+
columns = np.stack([f.numpy() for f in columns])
94+
return columns
9595

9696

9797
def test_get_item_length():
@@ -178,11 +178,11 @@ def test_instanciate_from_torchvision(dataset_name, train, erase_after_test=True
178178

179179
# dummy item
180180
dummy_item = dataset[0]
181-
dummy_keys = list(dummy_item.keys())
181+
dummy_columns = list(dummy_item.keys())
182182
dummy_shapes = [v.shape for v in dummy_item.values()]
183183

184-
# check keys
185-
assert dataset.columns == dummy_keys == ["input", "label"]
184+
# check columns
185+
assert dataset.columns == dummy_columns == ["input", "label"]
186186

187187
# check output shape
188188
assert (
@@ -226,17 +226,17 @@ def test_load_arrays_and_custom(x_shape, num_labels, num_samples, one_hot):
226226
for dataset_id in [tuple_np, dict_np, tuple_torch, dict_torch, tensor_ds_torch]:
227227
ds = handler.load_dataset(dataset_id, columns=["key_a", "key_b"])
228228

229-
# check registered keys, shapes
230-
output_keys = ds.columns
229+
# check registered columns, shapes
230+
output_columns = ds.columns
231231
output_shapes = ds.output_shapes
232-
assert output_keys == ["key_a", "key_b"]
232+
assert output_columns == ["key_a", "key_b"]
233233
assert output_shapes == [
234234
torch.Size(x_shape),
235235
torch.Size([num_labels] if one_hot else []),
236236
]
237-
# check item keys, shapes
237+
# check item columns, shapes
238238
dummy_item = ds[0]
239-
assert list(dummy_item.keys()) == output_keys
239+
assert list(dummy_item.keys()) == output_columns
240240
assert list(map(lambda x: x.shape, dummy_item.values())) == output_shapes
241241

242242

@@ -271,29 +271,29 @@ def test_data_handler_full_pipeline(x_shape, num_samples, num_labels, one_hot):
271271
num_samples_b = len(dataset_b)
272272
assert num_samples == (num_samples_a + num_samples_b)
273273

274-
# assign feature, map, get feature
274+
# assign column, map, get column
275275
def map_fn_a(item):
276-
item["new_feature"] -= 3
276+
item["new_column"] -= 3
277277
return item
278278

279279
def map_fn_b(item):
280-
item["new_feature"] = item["new_feature"] * 3 + 2
280+
item["new_column"] = item["new_column"] * 3 + 2
281281
return item
282282

283-
dataset_a = assign_feature_value(dataset_a, "new_feature", 0)
283+
dataset_a = assign_value_to_column(dataset_a, "new_column", 0)
284284
dataset_a = dataset_a.map(map_fn_a)
285-
features_a = torch.Tensor(get_feature_from_ds(dataset_a, "new_feature"))
286-
assert torch.all(features_a == torch.Tensor([-3] * num_samples_a))
285+
columns_a = torch.Tensor(get_column_from_ds(dataset_a, "new_column"))
286+
assert torch.all(columns_a == torch.Tensor([-3] * num_samples_a))
287287

288-
dataset_b = assign_feature_value(dataset_b, "new_feature", 1)
288+
dataset_b = assign_value_to_column(dataset_b, "new_column", 1)
289289
dataset_b = dataset_b.map(map_fn_b)
290-
features_b = torch.Tensor(get_feature_from_ds(dataset_b, "new_feature"))
291-
assert torch.all(features_b == torch.Tensor([5] * num_samples_b))
290+
columns_b = torch.Tensor(get_column_from_ds(dataset_b, "new_column"))
291+
assert torch.all(columns_b == torch.Tensor([5] * num_samples_b))
292292

293293
# concatenate two sub datasets
294294
dataset_c = handler.merge(dataset_a, dataset_b)
295-
features_c = torch.Tensor(get_feature_from_ds(dataset_c, "new_feature"))
296-
assert torch.all(features_c == torch.cat([features_a, features_b]))
295+
columns_c = torch.Tensor(get_column_from_ds(dataset_c, "new_column"))
296+
assert torch.all(columns_c == torch.cat([columns_a, columns_b]))
297297

298298
# prepare dataloader
299299
loader = handler.prepare(dataset_c, 64, shuffle=True)
@@ -360,13 +360,13 @@ def test_split_by_class(in_labels, out_labels, one_hot, expected_output):
360360
len_inds = len(in_dataset)
361361
len_outds = len(out_dataset)
362362

363-
classes = get_feature_from_ds(dataset, "label")
363+
classes = get_column_from_ds(dataset, "label")
364364
classes = np.unique(classes, axis=0)
365365

366-
classes_in = get_feature_from_ds(in_dataset, "label")
366+
classes_in = get_column_from_ds(in_dataset, "label")
367367
classes_in = np.unique(classes_in, axis=0)
368368

369-
classes_out = get_feature_from_ds(out_dataset, "label")
369+
classes_out = get_column_from_ds(out_dataset, "label")
370370
classes_out = np.unique(classes_out, axis=0)
371371

372372
assert len_ds == expected_output[0]

0 commit comments

Comments
 (0)