Skip to content

Commit 6ca34b6

Browse files
tf-transform-teamzoyahav
authored andcommitted
Project import generated by Copybara.
PiperOrigin-RevId: 202361704
1 parent 3014617 commit 6ca34b6

File tree

9 files changed

+485
-28
lines changed

9 files changed

+485
-28
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ of examples. `tf.Transform` extends these capabilities to support full-passes
1313
over the example data.
1414

1515
The output of `tf.Transform` is exported as a
16-
[TensorFlow graph](http://tensorflow.org/programmers_guide/graphs) to use for training and serving.
16+
[TensorFlow graph](http://tensorflow.org/guide/graphs) to use for training and serving.
1717
Using the same graph for both training and serving can prevent skew since the
1818
same transformations are applied in both stages.
1919

RELEASE.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,14 @@
3232
In this case it returns a vector mean computed over the non-missing values of
3333
the SparseTensor.
3434
* Update examples to use "core" TensorFlow estimator API (`tf.estimator`).
35+
* Depends on `protobuf>=3.6.0<4`.
3536

3637
## Breaking changes
3738
* `apply_saved_transform` is removed. See note on
3839
`partially_apply_saved_transform` in the `Deprecations` section.
3940
* No longer set `vocabulary_file` in `IntDomain` when using
4041
`tft.compute_and_apply_vocabulary` or `tft.apply_vocabulary`.
42+
* Requires pre-installed TensorFlow >=1.8,<2.
4143

4244
## Deprecations
4345
* The `expected_asset_file_contents` of

examples/census_example.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -300,21 +300,15 @@ def serving_input_fn():
300300
return serving_input_fn
301301

302302

303-
def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES,
304-
num_test_instances=NUM_TEST_INSTANCES):
305-
"""Train the model on training data and evaluate on test data.
303+
def get_feature_columns(tf_transform_output):
304+
"""Returns the FeatureColumns for the model.
306305
307306
Args:
308-
working_dir: Directory to read transformed data and metadata from and to
309-
write exported model to.
310-
num_train_instances: Number of instances in train set
311-
num_test_instances: Number of instances in test set
307+
tf_transform_output: A `TFTransformOutput` object.
312308
313309
Returns:
314-
The results from the estimator's 'evaluate' method
310+
A list of FeatureColumns.
315311
"""
316-
tf_transform_output = tft.TFTransformOutput(working_dir)
317-
318312
# Wrap scalars as real valued columns.
319313
real_valued_columns = [tf.feature_column.numeric_column(key, shape=())
320314
for key in NUMERIC_FEATURE_KEYS]
@@ -327,10 +321,28 @@ def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES,
327321
vocab_filename=key))
328322
for key in CATEGORICAL_FEATURE_KEYS]
329323

324+
return real_valued_columns + one_hot_columns
325+
326+
327+
def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES,
328+
num_test_instances=NUM_TEST_INSTANCES):
329+
"""Train the model on training data and evaluate on test data.
330+
331+
Args:
332+
working_dir: Directory to read transformed data and metadata from and to
333+
write exported model to.
334+
num_train_instances: Number of instances in train set
335+
num_test_instances: Number of instances in test set
336+
337+
Returns:
338+
The results from the estimator's 'evaluate' method
339+
"""
340+
tf_transform_output = tft.TFTransformOutput(working_dir)
341+
330342
run_config = tf.estimator.RunConfig()
331343

332344
estimator = tf.estimator.LinearClassifier(
333-
feature_columns=real_valued_columns + one_hot_columns,
345+
feature_columns=get_feature_columns(tf_transform_output),
334346
config=run_config)
335347

336348
# Fit the model using the default optimizer.

examples/sentiment_example.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,28 @@ def serving_input_fn():
294294
return serving_input_fn
295295

296296

297+
def get_feature_columns(tf_transform_output):
298+
"""Returns the FeatureColumns for the model.
299+
300+
Args:
301+
tf_transform_output: A `TFTransformOutput` object.
302+
303+
Returns:
304+
A list of FeatureColumns.
305+
"""
306+
del tf_transform_output # unused
307+
# Unrecognized tokens are represented by -1, but
308+
# categorical_column_with_identity uses the mod operator to map integers
309+
# to the range [0, bucket_size). By choosing bucket_size=VOCAB_SIZE + 1, we
310+
# represent unrecognized tokens as VOCAB_SIZE.
311+
review_column = tf.feature_column.categorical_column_with_identity(
312+
REVIEW_KEY, num_buckets=VOCAB_SIZE + 1)
313+
weighted_reviews = tf.feature_column.weighted_categorical_column(
314+
review_column, REVIEW_WEIGHT_KEY)
315+
316+
return [weighted_reviews]
317+
318+
297319
def train_and_evaluate(working_dir,
298320
num_train_instances=NUM_TRAIN_INSTANCES,
299321
num_test_instances=NUM_TEST_INSTANCES):
@@ -309,19 +331,10 @@ def train_and_evaluate(working_dir,
309331
"""
310332
tf_transform_output = tft.TFTransformOutput(working_dir)
311333

312-
# Unrecognized tokens are represented by -1, but
313-
# categorical_column_with_identity uses the mod operator to map integers
314-
# to the range [0, bucket_size). By choosing bucket_size=VOCAB_SIZE + 1, we
315-
# represent unrecognized tokens as VOCAB_SIZE.
316-
review_column = tf.feature_column.categorical_column_with_identity(
317-
REVIEW_KEY, num_buckets=VOCAB_SIZE + 1)
318-
weighted_reviews = tf.feature_column.weighted_categorical_column(
319-
review_column, REVIEW_WEIGHT_KEY)
320-
321334
run_config = tf.estimator.RunConfig()
322335

323336
estimator = tf.estimator.LinearClassifier(
324-
feature_columns=[weighted_reviews],
337+
feature_columns=get_feature_columns(tf_transform_output),
325338
config=run_config)
326339

327340
# Fit the model using the default optimizer.

setup.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from setuptools import setup
1818

1919
# Tensorflow transform version.
20-
__version__ = '0.7.0dev'
20+
__version__ = '0.8.0dev'
2121

2222

2323
def _make_required_install_packages():
@@ -26,9 +26,8 @@ def _make_required_install_packages():
2626
'apache-beam[gcp]>=2.4,<3',
2727
'numpy>=1.10,<2',
2828

29-
# Protobuf libraries < 3.5.2 do not have 'cpp' implementation of protobufs
30-
# for Windows and Mac.
31-
'protobuf>=3.5.2,<4',
29+
# TF now requires protobuf>=3.6.0.
30+
'protobuf>=3.6.0,<4',
3231

3332
'six>=1.9,<2',
3433

tensorflow_transform/beam/impl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,9 @@ def process(self, batch, saved_model_dir):
389389
def _assert_tensorflow_version():
390390
# Fail with a clear error in case we are not using a compatible TF version.
391391
major, minor, _ = tf.__version__.split('.')
392-
if int(major) != 1 or int(minor) < 6:
392+
if int(major) != 1 or int(minor) < 8:
393393
raise RuntimeError(
394-
'TensorFlow version >= 1.6, < 2 is required. Found (%s). Please '
394+
'TensorFlow version >= 1.8, < 2 is required. Found (%s). Please '
395395
'install the latest 1.x version from '
396396
'https://github.com/tensorflow/tensorflow. ' % tf.__version__)
397397

0 commit comments

Comments
 (0)