-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtfDatasetTools.py
36 lines (31 loc) · 1.03 KB
/
tfDatasetTools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np
def decodeZeroDimTensor(yourTensor):
"""
Takes a tensor of zero dim and returns the string stored inside
Args:
yourTensor - (tf.Tensor) The input tensor you want to convert
Returns:
yourString - (string) The string from the decoded tensor
"""
noSpecial = tf.strings.regex_replace(
yourTensor,
"<[^>]+>",
" ")
return np.array(noSpecial).reshape((1,))[0].decode("utf-8")
def convertTakeDataset(takeDataset):
"""
Converts your takeDataset into features and labels
Args:
takeDataset - (TakeDataset) the TakeDataset that contains some number of examples from your initial dataset
Returns:
features - (list) the list of features
labels - (list) the list of labels
"""
labels = []
features = []
for text, label in takeDataset:
labels.append(int(np.array(label)))
features.append(decodeZeroDimTensor(text))
return features, labels