Project import generated by Copybara.

tf-transform-team · zoyahav · commit 6ca34b65e426 · 2018-06-27T16:45:36.000-04:00
PiperOrigin-RevId: 202361704
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ of examples. `tf.Transform` extends these capabilities to support full-passes
 over the example data.
 
 The output of `tf.Transform` is exported as a
-[TensorFlow graph](http://tensorflow.org/programmers_guide/graphs) to use for training and serving.
+[TensorFlow graph](http://tensorflow.org/guide/graphs) to use for training and serving.
 Using the same graph for both training and serving can prevent skew since the
 same transformations are applied in both stages.
 
diff --git a/RELEASE.md b/RELEASE.md
@@ -32,12 +32,14 @@
   In this case it returns a vector mean computed over the non-missing values of
   the SparseTensor.
 * Update examples to use "core" TensorFlow estimator API (`tf.estimator`).
+* Depends on `protobuf>=3.6.0<4`.
 
 ## Breaking changes
 * `apply_saved_transform` is removed.  See note on
   `partially_apply_saved_transform` in the `Deprecations` section.
 * No longer set `vocabulary_file` in `IntDomain` when using
   `tft.compute_and_apply_vocabulary` or `tft.apply_vocabulary`.
+* Requires pre-installed TensorFlow >=1.8,<2.
 
 ## Deprecations
 * The `expected_asset_file_contents` of
diff --git a/examples/census_example.py b/examples/census_example.py
@@ -300,21 +300,15 @@ def serving_input_fn():
   return serving_input_fn
 
 
-def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES,
-                       num_test_instances=NUM_TEST_INSTANCES):
-  """Train the model on training data and evaluate on test data.
+def get_feature_columns(tf_transform_output):
+  """Returns the FeatureColumns for the model.
 
   Args:
-    working_dir: Directory to read transformed data and metadata from and to
-        write exported model to.
-    num_train_instances: Number of instances in train set
-    num_test_instances: Number of instances in test set
+    tf_transform_output: A `TFTransformOutput` object.
 
   Returns:
-    The results from the estimator's 'evaluate' method
+    A list of FeatureColumns.
   """
-  tf_transform_output = tft.TFTransformOutput(working_dir)
-
   # Wrap scalars as real valued columns.
   real_valued_columns = [tf.feature_column.numeric_column(key, shape=())
                          for key in NUMERIC_FEATURE_KEYS]
@@ -327,10 +321,28 @@ def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES,
               vocab_filename=key))
       for key in CATEGORICAL_FEATURE_KEYS]
 
+  return real_valued_columns + one_hot_columns
+
+
+def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES,
+                       num_test_instances=NUM_TEST_INSTANCES):
+  """Train the model on training data and evaluate on test data.
+
+  Args:
+    working_dir: Directory to read transformed data and metadata from and to
+        write exported model to.
+    num_train_instances: Number of instances in train set
+    num_test_instances: Number of instances in test set
+
+  Returns:
+    The results from the estimator's 'evaluate' method
+  """
+  tf_transform_output = tft.TFTransformOutput(working_dir)
+
   run_config = tf.estimator.RunConfig()
 
   estimator = tf.estimator.LinearClassifier(
-      feature_columns=real_valued_columns + one_hot_columns,
+      feature_columns=get_feature_columns(tf_transform_output),
       config=run_config)
 
   # Fit the model using the default optimizer.
diff --git a/examples/sentiment_example.py b/examples/sentiment_example.py
@@ -294,6 +294,28 @@ def serving_input_fn():
   return serving_input_fn
 
 
+def get_feature_columns(tf_transform_output):
+  """Returns the FeatureColumns for the model.
+
+  Args:
+    tf_transform_output: A `TFTransformOutput` object.
+
+  Returns:
+    A list of FeatureColumns.
+  """
+  del tf_transform_output  # unused
+  # Unrecognized tokens are represented by -1, but
+  # categorical_column_with_identity uses the mod operator to map integers
+  # to the range [0, bucket_size).  By choosing bucket_size=VOCAB_SIZE + 1, we
+  # represent unrecognized tokens as VOCAB_SIZE.
+  review_column = tf.feature_column.categorical_column_with_identity(
+      REVIEW_KEY, num_buckets=VOCAB_SIZE + 1)
+  weighted_reviews = tf.feature_column.weighted_categorical_column(
+      review_column, REVIEW_WEIGHT_KEY)
+
+  return [weighted_reviews]
+
+
 def train_and_evaluate(working_dir,
                        num_train_instances=NUM_TRAIN_INSTANCES,
                        num_test_instances=NUM_TEST_INSTANCES):
@@ -309,19 +331,10 @@ def train_and_evaluate(working_dir,
   """
   tf_transform_output = tft.TFTransformOutput(working_dir)
 
-  # Unrecognized tokens are represented by -1, but
-  # categorical_column_with_identity uses the mod operator to map integers
-  # to the range [0, bucket_size).  By choosing bucket_size=VOCAB_SIZE + 1, we
-  # represent unrecognized tokens as VOCAB_SIZE.
-  review_column = tf.feature_column.categorical_column_with_identity(
-      REVIEW_KEY, num_buckets=VOCAB_SIZE + 1)
-  weighted_reviews = tf.feature_column.weighted_categorical_column(
-      review_column, REVIEW_WEIGHT_KEY)
-
   run_config = tf.estimator.RunConfig()
 
   estimator = tf.estimator.LinearClassifier(
-      feature_columns=[weighted_reviews],
+      feature_columns=get_feature_columns(tf_transform_output),
       config=run_config)
 
   # Fit the model using the default optimizer.
diff --git a/setup.py b/setup.py
@@ -17,7 +17,7 @@
 from setuptools import setup
 
 # Tensorflow transform version.
-__version__ = '0.7.0dev'
+__version__ = '0.8.0dev'
 
 
 def _make_required_install_packages():
@@ -26,9 +26,8 @@ def _make_required_install_packages():
       'apache-beam[gcp]>=2.4,<3',
       'numpy>=1.10,<2',
 
-      # Protobuf libraries < 3.5.2 do not have 'cpp' implementation of protobufs
-      # for Windows and Mac.
-      'protobuf>=3.5.2,<4',
+      # TF now requires protobuf>=3.6.0.
+      'protobuf>=3.6.0,<4',
 
       'six>=1.9,<2',
 
diff --git a/tensorflow_transform/beam/impl.py b/tensorflow_transform/beam/impl.py
@@ -389,9 +389,9 @@ def process(self, batch, saved_model_dir):
 def _assert_tensorflow_version():
   # Fail with a clear error in case we are not using a compatible TF version.
   major, minor, _ = tf.__version__.split('.')
-  if int(major) != 1 or int(minor) < 6:
+  if int(major) != 1 or int(minor) < 8:
     raise RuntimeError(
-        'TensorFlow version >= 1.6, < 2 is required. Found (%s). Please '
+        'TensorFlow version >= 1.8, < 2 is required. Found (%s). Please '
         'install the latest 1.x version from '
         'https://github.com/tensorflow/tensorflow. ' % tf.__version__)
 
diff --git a/tensorflow_transform/saved/legacy_saved_transform_io.py b/tensorflow_transform/saved/legacy_saved_transform_io.py
diff --git a/tensorflow_transform/saved/saved_transform_io.py b/tensorflow_transform/saved/saved_transform_io.py
diff --git a/tensorflow_transform/saved/saved_transform_io_test.py b/tensorflow_transform/saved/saved_transform_io_test.py