Skip to content

Commit

Permalink
Standardize on "IDs" for docstrings across code base.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 339789430
  • Loading branch information
j2i2 authored and copybara-github committed Oct 30, 2020
1 parent 8f1f060 commit 4d7f581
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 41 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ Layers are basic building blocks of Trax models. You will learn all about them i

```python
class Embedding(base.Layer):
"""Trainable layer that maps discrete tokens/ids to vectors."""
"""Trainable layer that maps discrete tokens/IDs to vectors."""

def __init__(self,
vocab_size,
Expand All @@ -167,7 +167,7 @@ class Embedding(base.Layer):
Args:
vocab_size: Size of the input vocabulary. The layer will assign a unique
vector to each id in `range(vocab_size)`.
vector to each ID in `range(vocab_size)`.
d_feature: Dimensionality/depth of the output vectors.
kernel_initializer: Function that creates (random) initial vectors for
the embedding.
Expand All @@ -178,10 +178,10 @@ class Embedding(base.Layer):
self._kernel_initializer = kernel_initializer

def forward(self, x):
"""Returns embedding vectors corresponding to input token id's.
"""Returns embedding vectors corresponding to input token IDs.
Args:
x: Tensor of token id's.
x: Tensor of token IDs.
Returns:
Tensor of embedding vectors.
Expand Down
6 changes: 3 additions & 3 deletions docs/source/notebooks/trax_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@
"\n",
"```\n",
"class Embedding(base.Layer):\n",
" \"\"\"Trainable layer that maps discrete tokens/ids to vectors.\"\"\"\n",
" \"\"\"Trainable layer that maps discrete tokens/IDs to vectors.\"\"\"\n",
"\n",
" def __init__(self,\n",
" vocab_size,\n",
Expand All @@ -386,10 +386,10 @@
" self._kernel_initializer = kernel_initializer\n",
"\n",
" def forward(self, x):\n",
" \"\"\"Returns embedding vectors corresponding to input token id's.\n",
" \"\"\"Returns embedding vectors corresponding to input token IDs.\n",
"\n",
" Args:\n",
" x: Tensor of token id's.\n",
" x: Tensor of token IDs.\n",
"\n",
" Returns:\n",
" Tensor of embedding vectors.\n",
Expand Down
2 changes: 1 addition & 1 deletion pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ files-output=no
# Tells whether to display a full report or only the messages.
reports=no

# Disable the report(s) with the given id(s).
# Disable the report(s) with the given ID(s).
disable-report=R0001,R0002,R0003,R0004,R0101,R0102,R0201,R0202,R0220,R0401,R0402,R0701,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,R0923

# Error message template (continued on second line)
Expand Down
2 changes: 1 addition & 1 deletion trax/data/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def add_loss_weights(generator, id_to_mask=None):
Args:
generator: Stream of tuples.
id_to_mask: If not None, int-valued id that represents padding, as opposed
to true target id's.
to true target IDs.
Yields:
Examples from the augmented stream.
Expand Down
38 changes: 19 additions & 19 deletions trax/data/text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def to_unicode_utf8(s):


def strip_ids(ids, ids_to_strip):
"""Strip ids_to_strip from the end ids."""
"""Strip ids_to_strip from the end IDs."""
ids = list(ids)
while ids and ids[-1] in ids_to_strip:
ids.pop()
Expand All @@ -101,9 +101,9 @@ def num_reserved_ids(self):
return self._num_reserved_ids

def encode(self, s):
"""Transform a human-readable string into a sequence of int ids.
"""Transform a human-readable string into a sequence of int IDs.
The ids should be in the range [num_reserved_ids, vocab_size). Ids [0,
The IDs should be in the range [num_reserved_ids, vocab_size). IDs [0,
num_reserved_ids) are reserved.
EOS is not appended.
Expand All @@ -117,9 +117,9 @@ def encode(self, s):
return [int(w) + self._num_reserved_ids for w in s.split()]

def decode(self, ids, strip_extraneous=False):
"""Transform a sequence of int ids into a human-readable string.
"""Transform a sequence of int IDs into a human-readable string.
EOS is not expected in ids.
EOS is not expected in IDs.
Args:
ids: list of integers to be converted.
Expand All @@ -134,9 +134,9 @@ def decode(self, ids, strip_extraneous=False):
return " ".join(self.decode_list(ids))

def decode_list(self, ids):
"""Transform a sequence of int ids into a their string versions.
"""Transform a sequence of int IDs into a their string versions.
This method supports transforming individual input/output ids to their
This method supports transforming individual input/output IDs to their
string versions so that sequence to/from text conversions can be visualized
in a human readable format.
Expand Down Expand Up @@ -472,7 +472,7 @@ def __init__(self, filename=None):
super(SubwordTextEncoder, self).__init__()

def encode(self, s):
"""Converts a native string to a list of subtoken ids.
"""Converts a native string to a list of subtoken IDs.
Args:
s: a native string.
Expand All @@ -483,10 +483,10 @@ def encode(self, s):
tokenizer.encode(native_to_unicode(s)))

def encode_without_tokenizing(self, token_text):
"""Converts string to list of subtoken ids without calling tokenizer.
"""Converts string to list of subtoken IDs without calling tokenizer.
This treats `token_text` as a single token and directly converts it
to subtoken ids. This may be useful when the default tokenizer doesn't
to subtoken IDs. This may be useful when the default tokenizer doesn't
do what we want (e.g., when encoding text with tokens composed of lots of
nonalphanumeric characters). It is then up to the caller to make sure that
raw text is consistently converted into tokens. Only use this if you are
Expand All @@ -495,12 +495,12 @@ def encode_without_tokenizing(self, token_text):
Args:
token_text: A native string representation of a single token.
Returns:
A list of subword token ids; i.e., integers in the range [0, vocab_size).
A list of subword token IDs; i.e., integers in the range [0, vocab_size).
"""
return self._tokens_to_subtoken_ids([native_to_unicode(token_text)])

def decode(self, ids, strip_extraneous=False):
"""Converts a sequence of subtoken ids to a native string.
"""Converts a sequence of subtoken IDs to a native string.
Args:
ids: a list of integers in the range [0, vocab_size)
Expand All @@ -523,7 +523,7 @@ def vocab_size(self):
return len(self._all_subtoken_strings)

def _tokens_to_subtoken_ids(self, tokens):
"""Converts a list of tokens to a list of subtoken ids.
"""Converts a list of tokens to a list of subtoken IDs.
Args:
tokens: a list of strings.
Expand All @@ -536,7 +536,7 @@ def _tokens_to_subtoken_ids(self, tokens):
return ret

def _token_to_subtoken_ids(self, token):
"""Converts token to a list of subtoken ids.
"""Converts token to a list of subtoken IDs.
Args:
token: a string.
Expand All @@ -553,7 +553,7 @@ def _token_to_subtoken_ids(self, token):
return ret

def _subtoken_ids_to_tokens(self, subtokens):
"""Converts a list of subtoken ids to a list of tokens.
"""Converts a list of subtoken IDs to a list of tokens.
Args:
subtokens: a list of integers in the range [0, vocab_size)
Expand Down Expand Up @@ -960,7 +960,7 @@ def encode(self, s):
return im.imread(s)

def decode(self, ids, strip_extraneous=False):
"""Transform a sequence of int ids into an image file.
"""Transform a sequence of int IDs into an image file.
Args:
ids: list of integers to be converted.
Expand All @@ -970,7 +970,7 @@ def decode(self, ids, strip_extraneous=False):
Path to the temporary file where the image was saved.
Raises:
ValueError: if the ids are not of the appropriate size.
ValueError: if the IDs are not of the appropriate size.
"""
del strip_extraneous
_, tmp_file_path = tempfile.mkstemp("_decode.png")
Expand Down Expand Up @@ -998,7 +998,7 @@ def decode(self, ids, strip_extraneous=False):
return tmp_file_path

def decode_list(self, ids):
"""Transform a sequence of int ids into an image file.
"""Transform a sequence of int IDs into an image file.
Args:
ids: list of integers to be converted.
Expand Down Expand Up @@ -1038,7 +1038,7 @@ def decode(self, ids, strip_extraneous=False):
String having space separated float values.
Raises:
ValueError: if the ids are not of the appropriate size.
ValueError: if the IDs are not of the appropriate size.
"""
del strip_extraneous
return " ".join([str(i) for i in ids])
4 changes: 2 additions & 2 deletions trax/data/tf_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def vocab_size(vocab_type='subword', vocab_file=None, vocab_dir=None,
This function can be used to set the size of the final layers of a model that
needs to predict symbols from a given vocabulary. More precisely, if this
function returns N then the last layer size should be set to at least N (it
can be more). Note that this function does take reserved ids into account.
can be more). Note that this function does take reserved IDs into account.
Args:
vocab_type: Type of vocabulary, one of: 'subword', 'sentencepiece', 'char'.
Expand All @@ -386,7 +386,7 @@ def vocab_size(vocab_type='subword', vocab_file=None, vocab_dir=None,
n_reserved_ids: An int, offset added so 0, ..., n_reserved_ids-1 are unused.
Returns:
An integer, the number of symbols used (including reserved ids).
An integer, the number of symbols used (including reserved IDs).
"""
vocab = _get_vocab(vocab_type, vocab_file, vocab_dir)
return vocab.vocab_size + n_reserved_ids
Expand Down
6 changes: 3 additions & 3 deletions trax/intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@
"\n",
"```\n",
"class Embedding(base.Layer):\n",
" \"\"\"Trainable layer that maps discrete tokens/ids to vectors.\"\"\"\n",
" \"\"\"Trainable layer that maps discrete tokens/IDs to vectors.\"\"\"\n",
"\n",
" def __init__(self,\n",
" vocab_size,\n",
Expand All @@ -386,10 +386,10 @@
" self._kernel_initializer = kernel_initializer\n",
"\n",
" def forward(self, x):\n",
" \"\"\"Returns embedding vectors corresponding to input token id's.\n",
" \"\"\"Returns embedding vectors corresponding to input token IDs.\n",
"\n",
" Args:\n",
" x: Tensor of token id's.\n",
" x: Tensor of token IDs.\n",
"\n",
" Returns:\n",
" Tensor of embedding vectors.\n",
Expand Down
12 changes: 6 additions & 6 deletions trax/layers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def init_weights_and_state(self, input_signature):
# dimension at the end. This dimension size corresponds to embedding depth.
@assert_shape('...->...d')
class Embedding(base.Layer):
"""Trainable layer that maps discrete tokens/ids to vectors.
"""Trainable layer that maps discrete tokens/IDs to vectors.
Embedding layers are commonly used to map discrete data, like words in NLP,
into vectors. Here is a canonical example::
Expand All @@ -142,9 +142,9 @@ def __init__(self,
distribution='uniform')):
"""Returns an embedding layer with given vocabulary size and vector size.
The layer clips input values (token ids) to the range `[0, vocab_size)`.
That is, negative token ids all clip to `0` before being mapped to a
vector, and token ids with value `vocab_size` or greater all clip to
The layer clips input values (token IDs) to the range `[0, vocab_size)`.
That is, negative token IDs all clip to `0` before being mapped to a
vector, and token IDs with value `vocab_size` or greater all clip to
`vocab_size - 1` before being mapped to a vector.
Args:
Expand All @@ -161,10 +161,10 @@ def __init__(self,
self._kernel_initializer = kernel_initializer

def forward(self, x):
"""Returns embedding vectors corresponding to input token id's.
"""Returns embedding vectors corresponding to input token IDs.
Args:
x: Tensor of token id's.
x: Tensor of token IDs.
Returns:
Tensor of embedding vectors.
Expand Down
2 changes: 1 addition & 1 deletion trax/layers/core_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_forward(self):
y = layer(x)
self.assertEqual(y.shape, (5, 3))

# For distinct in-domain token ids, resulting vectors should be distinct.
# For distinct in-domain token IDs, resulting vectors should be distinct.
self.assertNotEqual(y[0].tolist(), y[1].tolist())
self.assertNotEqual(y[0].tolist(), y[2].tolist())
self.assertNotEqual(y[1].tolist(), y[2].tolist())
Expand Down
2 changes: 1 addition & 1 deletion trax/layers/research/efficient_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def hash_vecs(vecs, n_buckets_in, n_hashes, rng):
Returns:
A pair (buckets, n_buckets) where buckets is a tensor of shape
[n_hashes, batch_size] of integers -- the hash bucket ids, and
[n_hashes, batch_size] of integers -- the hash bucket IDs, and
n_buckets is an int, the total number of hash buckets, equal to
the product of all items in n_buckets_in.
"""
Expand Down

0 comments on commit 4d7f581

Please sign in to comment.