Standardize on "IDs" for docstrings across code base.

PiperOrigin-RevId: 339789430
google · Oct 30, 2020 · 4d7f581 · 4d7f581
1 parent 8f1f060
commit 4d7f581
Show file tree

Hide file tree

Showing 10 changed files with 41 additions and 41 deletions.
diff --git a/README.md b/README.md
@@ -157,7 +157,7 @@ Layers are basic building blocks of Trax models. You will learn all about them i
 
 ```python
 class Embedding(base.Layer):
-  """Trainable layer that maps discrete tokens/ids to vectors."""
+  """Trainable layer that maps discrete tokens/IDs to vectors."""
 
   def __init__(self,
                vocab_size,
@@ -167,7 +167,7 @@ class Embedding(base.Layer):
 
     Args:
       vocab_size: Size of the input vocabulary. The layer will assign a unique
-          vector to each id in `range(vocab_size)`.
+          vector to each ID in `range(vocab_size)`.
       d_feature: Dimensionality/depth of the output vectors.
       kernel_initializer: Function that creates (random) initial vectors for
           the embedding.
@@ -178,10 +178,10 @@ class Embedding(base.Layer):
     self._kernel_initializer = kernel_initializer
 
   def forward(self, x):
-    """Returns embedding vectors corresponding to input token id's.
+    """Returns embedding vectors corresponding to input token IDs.
 
     Args:
-      x: Tensor of token id's.
+      x: Tensor of token IDs.
 
     Returns:
       Tensor of embedding vectors.

diff --git a/docs/source/notebooks/trax_intro.ipynb b/docs/source/notebooks/trax_intro.ipynb
@@ -365,7 +365,7 @@
         "\n",
         "```\n",
         "class Embedding(base.Layer):\n",
-        "  \"\"\"Trainable layer that maps discrete tokens/ids to vectors.\"\"\"\n",
+        "  \"\"\"Trainable layer that maps discrete tokens/IDs to vectors.\"\"\"\n",
         "\n",
         "  def __init__(self,\n",
         "               vocab_size,\n",
@@ -386,10 +386,10 @@
         "    self._kernel_initializer = kernel_initializer\n",
         "\n",
         "  def forward(self, x):\n",
-        "    \"\"\"Returns embedding vectors corresponding to input token id's.\n",
+        "    \"\"\"Returns embedding vectors corresponding to input token IDs.\n",
         "\n",
         "    Args:\n",
-        "      x: Tensor of token id's.\n",
+        "      x: Tensor of token IDs.\n",
         "\n",
         "    Returns:\n",
         "      Tensor of embedding vectors.\n",

diff --git a/pylintrc b/pylintrc
@@ -25,7 +25,7 @@ files-output=no
 # Tells whether to display a full report or only the messages.
 reports=no
 
-# Disable the report(s) with the given id(s).
+# Disable the report(s) with the given ID(s).
 disable-report=R0001,R0002,R0003,R0004,R0101,R0102,R0201,R0202,R0220,R0401,R0402,R0701,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,R0923
 
 # Error message template (continued on second line)

diff --git a/trax/data/inputs.py b/trax/data/inputs.py
@@ -388,7 +388,7 @@ def add_loss_weights(generator, id_to_mask=None):
   Args:
     generator: Stream of tuples.
     id_to_mask: If not None, int-valued id that represents padding, as opposed
-        to true target id's.
+        to true target IDs.
 
   Yields:
     Examples from the augmented stream.

diff --git a/trax/data/text_encoder.py b/trax/data/text_encoder.py
@@ -83,7 +83,7 @@ def to_unicode_utf8(s):
 
 
 def strip_ids(ids, ids_to_strip):
-  """Strip ids_to_strip from the end ids."""
+  """Strip ids_to_strip from the end IDs."""
   ids = list(ids)
   while ids and ids[-1] in ids_to_strip:
     ids.pop()
@@ -101,9 +101,9 @@ def num_reserved_ids(self):
     return self._num_reserved_ids
 
   def encode(self, s):
-    """Transform a human-readable string into a sequence of int ids.
+    """Transform a human-readable string into a sequence of int IDs.
 
-    The ids should be in the range [num_reserved_ids, vocab_size). Ids [0,
+    The IDs should be in the range [num_reserved_ids, vocab_size). IDs [0,
     num_reserved_ids) are reserved.
 
     EOS is not appended.
@@ -117,9 +117,9 @@ def encode(self, s):
     return [int(w) + self._num_reserved_ids for w in s.split()]
 
   def decode(self, ids, strip_extraneous=False):
-    """Transform a sequence of int ids into a human-readable string.
+    """Transform a sequence of int IDs into a human-readable string.
 
-    EOS is not expected in ids.
+    EOS is not expected in IDs.
 
     Args:
       ids: list of integers to be converted.
@@ -134,9 +134,9 @@ def decode(self, ids, strip_extraneous=False):
     return " ".join(self.decode_list(ids))
 
   def decode_list(self, ids):
-    """Transform a sequence of int ids into a their string versions.
+    """Transform a sequence of int IDs into a their string versions.
 
-    This method supports transforming individual input/output ids to their
+    This method supports transforming individual input/output IDs to their
     string versions so that sequence to/from text conversions can be visualized
     in a human readable format.
 
@@ -472,7 +472,7 @@ def __init__(self, filename=None):
     super(SubwordTextEncoder, self).__init__()
 
   def encode(self, s):
-    """Converts a native string to a list of subtoken ids.
+    """Converts a native string to a list of subtoken IDs.
 
     Args:
       s: a native string.
@@ -483,10 +483,10 @@ def encode(self, s):
         tokenizer.encode(native_to_unicode(s)))
 
   def encode_without_tokenizing(self, token_text):
-    """Converts string to list of subtoken ids without calling tokenizer.
+    """Converts string to list of subtoken IDs without calling tokenizer.
 
     This treats `token_text` as a single token and directly converts it
-    to subtoken ids. This may be useful when the default tokenizer doesn't
+    to subtoken IDs. This may be useful when the default tokenizer doesn't
     do what we want (e.g., when encoding text with tokens composed of lots of
     nonalphanumeric characters). It is then up to the caller to make sure that
     raw text is consistently converted into tokens. Only use this if you are
@@ -495,12 +495,12 @@ def encode_without_tokenizing(self, token_text):
     Args:
       token_text: A native string representation of a single token.
     Returns:
-      A list of subword token ids; i.e., integers in the range [0, vocab_size).
+      A list of subword token IDs; i.e., integers in the range [0, vocab_size).
     """
     return self._tokens_to_subtoken_ids([native_to_unicode(token_text)])
 
   def decode(self, ids, strip_extraneous=False):
-    """Converts a sequence of subtoken ids to a native string.
+    """Converts a sequence of subtoken IDs to a native string.
 
     Args:
       ids: a list of integers in the range [0, vocab_size)
@@ -523,7 +523,7 @@ def vocab_size(self):
     return len(self._all_subtoken_strings)
 
   def _tokens_to_subtoken_ids(self, tokens):
-    """Converts a list of tokens to a list of subtoken ids.
+    """Converts a list of tokens to a list of subtoken IDs.
 
     Args:
       tokens: a list of strings.
@@ -536,7 +536,7 @@ def _tokens_to_subtoken_ids(self, tokens):
     return ret
 
   def _token_to_subtoken_ids(self, token):
-    """Converts token to a list of subtoken ids.
+    """Converts token to a list of subtoken IDs.
 
     Args:
       token: a string.
@@ -553,7 +553,7 @@ def _token_to_subtoken_ids(self, token):
     return ret
 
   def _subtoken_ids_to_tokens(self, subtokens):
-    """Converts a list of subtoken ids to a list of tokens.
+    """Converts a list of subtoken IDs to a list of tokens.
 
     Args:
       subtokens: a list of integers in the range [0, vocab_size)
@@ -960,7 +960,7 @@ def encode(self, s):
     return im.imread(s)
 
   def decode(self, ids, strip_extraneous=False):
-    """Transform a sequence of int ids into an image file.
+    """Transform a sequence of int IDs into an image file.
 
     Args:
       ids: list of integers to be converted.
@@ -970,7 +970,7 @@ def decode(self, ids, strip_extraneous=False):
       Path to the temporary file where the image was saved.
 
     Raises:
-      ValueError: if the ids are not of the appropriate size.
+      ValueError: if the IDs are not of the appropriate size.
     """
     del strip_extraneous
     _, tmp_file_path = tempfile.mkstemp("_decode.png")
@@ -998,7 +998,7 @@ def decode(self, ids, strip_extraneous=False):
     return tmp_file_path
 
   def decode_list(self, ids):
-    """Transform a sequence of int ids into an image file.
+    """Transform a sequence of int IDs into an image file.
 
     Args:
       ids: list of integers to be converted.
@@ -1038,7 +1038,7 @@ def decode(self, ids, strip_extraneous=False):
       String having space separated float values.
 
     Raises:
-      ValueError: if the ids are not of the appropriate size.
+      ValueError: if the IDs are not of the appropriate size.
     """
     del strip_extraneous
     return " ".join([str(i) for i in ids])
diff --git a/trax/data/tf_inputs.py b/trax/data/tf_inputs.py
@@ -377,7 +377,7 @@ def vocab_size(vocab_type='subword', vocab_file=None, vocab_dir=None,
   This function can be used to set the size of the final layers of a model that
   needs to predict symbols from a given vocabulary. More precisely, if this
   function returns N then the last layer size should be set to at least N (it
-  can be more). Note that this function does take reserved ids into account.
+  can be more). Note that this function does take reserved IDs into account.
 
   Args:
     vocab_type: Type of vocabulary, one of: 'subword', 'sentencepiece', 'char'.
@@ -386,7 +386,7 @@ def vocab_size(vocab_type='subword', vocab_file=None, vocab_dir=None,
     n_reserved_ids: An int, offset added so 0, ..., n_reserved_ids-1 are unused.
 
   Returns:
-    An integer, the number of symbols used (including reserved ids).
+    An integer, the number of symbols used (including reserved IDs).
   """
   vocab = _get_vocab(vocab_type, vocab_file, vocab_dir)
   return vocab.vocab_size + n_reserved_ids

diff --git a/trax/intro.ipynb b/trax/intro.ipynb
@@ -365,7 +365,7 @@
         "\n",
         "```\n",
         "class Embedding(base.Layer):\n",
-        "  \"\"\"Trainable layer that maps discrete tokens/ids to vectors.\"\"\"\n",
+        "  \"\"\"Trainable layer that maps discrete tokens/IDs to vectors.\"\"\"\n",
         "\n",
         "  def __init__(self,\n",
         "               vocab_size,\n",
@@ -386,10 +386,10 @@
         "    self._kernel_initializer = kernel_initializer\n",
         "\n",
         "  def forward(self, x):\n",
-        "    \"\"\"Returns embedding vectors corresponding to input token id's.\n",
+        "    \"\"\"Returns embedding vectors corresponding to input token IDs.\n",
         "\n",
         "    Args:\n",
-        "      x: Tensor of token id's.\n",
+        "      x: Tensor of token IDs.\n",
         "\n",
         "    Returns:\n",
         "      Tensor of embedding vectors.\n",

diff --git a/trax/layers/core.py b/trax/layers/core.py
@@ -119,7 +119,7 @@ def init_weights_and_state(self, input_signature):
 # dimension at the end. This dimension size corresponds to embedding depth.
 @assert_shape('...->...d')
 class Embedding(base.Layer):
-  """Trainable layer that maps discrete tokens/ids to vectors.
+  """Trainable layer that maps discrete tokens/IDs to vectors.
 
   Embedding layers are commonly used to map discrete data, like words in NLP,
   into vectors. Here is a canonical example::
@@ -142,9 +142,9 @@ def __init__(self,
                                       distribution='uniform')):
     """Returns an embedding layer with given vocabulary size and vector size.
 
-    The layer clips input values (token ids) to the range `[0, vocab_size)`.
-    That is, negative token ids all clip to `0` before being mapped to a
-    vector, and token ids with value `vocab_size` or greater all clip to
+    The layer clips input values (token IDs) to the range `[0, vocab_size)`.
+    That is, negative token IDs all clip to `0` before being mapped to a
+    vector, and token IDs with value `vocab_size` or greater all clip to
     `vocab_size - 1` before being mapped to a vector.
 
     Args:
@@ -161,10 +161,10 @@ def __init__(self,
     self._kernel_initializer = kernel_initializer
 
   def forward(self, x):
-    """Returns embedding vectors corresponding to input token id's.
+    """Returns embedding vectors corresponding to input token IDs.
 
     Args:
-      x: Tensor of token id's.
+      x: Tensor of token IDs.
 
     Returns:
       Tensor of embedding vectors.

diff --git a/trax/layers/core_test.py b/trax/layers/core_test.py
@@ -122,7 +122,7 @@ def test_forward(self):
     y = layer(x)
     self.assertEqual(y.shape, (5, 3))
 
-    # For distinct in-domain token ids, resulting vectors should be distinct.
+    # For distinct in-domain token IDs, resulting vectors should be distinct.
     self.assertNotEqual(y[0].tolist(), y[1].tolist())
     self.assertNotEqual(y[0].tolist(), y[2].tolist())
     self.assertNotEqual(y[1].tolist(), y[2].tolist())

diff --git a/trax/layers/research/efficient_attention.py b/trax/layers/research/efficient_attention.py
@@ -65,7 +65,7 @@ def hash_vecs(vecs, n_buckets_in, n_hashes, rng):
 
   Returns:
     A pair (buckets, n_buckets) where buckets is a tensor of shape
-    [n_hashes, batch_size] of integers -- the hash bucket ids, and
+    [n_hashes, batch_size] of integers -- the hash bucket IDs, and
     n_buckets is an int, the total number of hash buckets, equal to
     the product of all items in n_buckets_in.
   """