@@ -294,6 +294,28 @@ def serving_input_fn():
294294 return serving_input_fn
295295
296296
297+ def get_feature_columns (tf_transform_output ):
298+ """Returns the FeatureColumns for the model.
299+
300+ Args:
301+ tf_transform_output: A `TFTransformOutput` object.
302+
303+ Returns:
304+ A list of FeatureColumns.
305+ """
306+ del tf_transform_output # unused
307+ # Unrecognized tokens are represented by -1, but
308+ # categorical_column_with_identity uses the mod operator to map integers
309+ # to the range [0, bucket_size). By choosing bucket_size=VOCAB_SIZE + 1, we
310+ # represent unrecognized tokens as VOCAB_SIZE.
311+ review_column = tf .feature_column .categorical_column_with_identity (
312+ REVIEW_KEY , num_buckets = VOCAB_SIZE + 1 )
313+ weighted_reviews = tf .feature_column .weighted_categorical_column (
314+ review_column , REVIEW_WEIGHT_KEY )
315+
316+ return [weighted_reviews ]
317+
318+
297319def train_and_evaluate (working_dir ,
298320 num_train_instances = NUM_TRAIN_INSTANCES ,
299321 num_test_instances = NUM_TEST_INSTANCES ):
@@ -309,19 +331,10 @@ def train_and_evaluate(working_dir,
309331 """
310332 tf_transform_output = tft .TFTransformOutput (working_dir )
311333
312- # Unrecognized tokens are represented by -1, but
313- # categorical_column_with_identity uses the mod operator to map integers
314- # to the range [0, bucket_size). By choosing bucket_size=VOCAB_SIZE + 1, we
315- # represent unrecognized tokens as VOCAB_SIZE.
316- review_column = tf .feature_column .categorical_column_with_identity (
317- REVIEW_KEY , num_buckets = VOCAB_SIZE + 1 )
318- weighted_reviews = tf .feature_column .weighted_categorical_column (
319- review_column , REVIEW_WEIGHT_KEY )
320-
321334 run_config = tf .estimator .RunConfig ()
322335
323336 estimator = tf .estimator .LinearClassifier (
324- feature_columns = [ weighted_reviews ] ,
337+ feature_columns = get_feature_columns ( tf_transform_output ) ,
325338 config = run_config )
326339
327340 # Fit the model using the default optimizer.
0 commit comments