Skip to content

Commit 66465ee

Browse files
All models under modelzoo/features/EmbeddingVariable are ready for supervisor's check
1 parent 0689b65 commit 66465ee

File tree

4 files changed

+117
-111
lines changed

4 files changed

+117
-111
lines changed

modelzoo/features/EmbeddingVariable/DIEN/generate_data.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
batch_size=1
2020
maxlen=100
2121

22+
data_location='data'
23+
test_file = os.path.join(data_location, "local_test_splitByUser")
24+
uid_voc = os.path.join(data_location, "uid_voc.pkl")
25+
mid_voc = os.path.join(data_location, "mid_voc.pkl")
26+
cat_voc = os.path.join(data_location, "cat_voc.pkl")
27+
2228
def prepare_data(input, target, maxlen=None, return_neg=False):
2329
# x: a list of sentences
2430
lengths_x = [len(s[4]) for s in input]
@@ -87,11 +93,6 @@ def prepare_data(input, target, maxlen=None, return_neg=False):
8793
target), numpy.array(lengths_x)
8894

8995

90-
data_location='data'
91-
test_file = os.path.join(data_location, "local_test_splitByUser")
92-
uid_voc = os.path.join(data_location, "uid_voc.pkl")
93-
mid_voc = os.path.join(data_location, "mid_voc.pkl")
94-
cat_voc = os.path.join(data_location, "cat_voc.pkl")
9596

9697
test_data = DataIterator(test_file,
9798
uid_voc,

modelzoo/features/EmbeddingVariable/DIEN/prepare_savedmodel.py

Lines changed: 32 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -35,73 +35,40 @@ def load_dict(filename):
3535
return pkl.load(f)
3636

3737

38-
def main(bf16,n_uid,n_mid,n_cat):
38+
def main(n_uid,n_mid,n_cat):
3939

4040
with tf.Session() as sess1:
4141

42-
if bf16:
43-
model = Model_DIN_V2_Gru_Vec_attGru_Neg_bf16(
44-
n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE,
45-
ATTENTION_SIZE)
46-
47-
# Initialize saver
48-
folder_dir = args.checkpoint
49-
saver = tf.train.Saver()
50-
51-
sess1.run(tf.global_variables_initializer())
52-
sess1.run(tf.local_variables_initializer())
53-
# Restore from checkpoint
54-
55-
saver.restore(sess1,tf.train.latest_checkpoint(folder_dir))
56-
57-
# Get save directory
58-
dir = "./savedmodels"
59-
os.makedirs(dir,exist_ok=True)
60-
cc_time = int(time.time())
61-
saved_path = os.path.join(dir,str(cc_time))
62-
os.mkdir(saved_path)
63-
64-
65-
tf.saved_model.simple_save(
66-
sess1,
67-
saved_path,
68-
inputs = {"mid_his_batch_ph":model.mid_his_batch_ph,"cat_his_batch_ph":model.cat_his_batch_ph,
69-
"uid_batch_ph":model.uid_batch_ph,"mid_batch_ph":model.mid_batch_ph,"cat_batch_ph":model.cat_batch_ph,
70-
"mask":model.mask,"seq_len_ph":model.seq_len_ph,"target_ph":model.target_ph},
71-
outputs = {"predict":model.y_hat}
72-
)
73-
74-
else:
75-
model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat,
76-
EMBEDDING_DIM, HIDDEN_SIZE,
77-
ATTENTION_SIZE)
78-
79-
# Initialize saver
80-
folder_dir = args.checkpoint
81-
saver = tf.train.Saver()
82-
83-
sess1.run(tf.global_variables_initializer())
84-
sess1.run(tf.local_variables_initializer())
85-
# Restore from checkpoint
86-
saver.restore(sess1,tf.train.latest_checkpoint(folder_dir))
87-
88-
# Get save directory
89-
dir = "./savedmodels"
90-
os.makedirs(dir,exist_ok=True)
91-
cc_time = int(time.time())
92-
saved_path = os.path.join(dir,str(cc_time))
93-
os.mkdir(saved_path)
94-
95-
96-
tf.saved_model.simple_save(
97-
sess1,
98-
saved_path,
99-
inputs = {"mid_his_batch_ph":model.mid_his_batch_ph,"cat_his_batch_ph":model.cat_his_batch_ph,
100-
"uid_batch_ph":model.uid_batch_ph,"mid_batch_ph":model.mid_batch_ph,"cat_batch_ph":model.cat_batch_ph,
101-
"mask":model.mask,"seq_len_ph":model.seq_len_ph,"target_ph":model.target_ph,"noclk_mid_batch_ph":model.noclk_mid_batch_ph,
102-
"noclk_cat_batch_ph":model.noclk_cat_batch_ph},
103-
outputs = {"predict":model.y_hat}
104-
)
42+
43+
model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat,
44+
EMBEDDING_DIM, HIDDEN_SIZE,
45+
ATTENTION_SIZE)
46+
47+
# Initialize saver
48+
folder_dir = args.checkpoint
49+
saver = tf.train.Saver()
50+
51+
sess1.run(tf.global_variables_initializer())
52+
sess1.run(tf.local_variables_initializer())
53+
# Restore from checkpoint
54+
saver.restore(sess1,tf.train.latest_checkpoint(folder_dir))
55+
56+
# Get save directory
57+
dir = "./savedmodels"
58+
os.makedirs(dir,exist_ok=True)
59+
cc_time = int(time.time())
60+
saved_path = os.path.join(dir,str(cc_time))
61+
os.mkdir(saved_path)
62+
63+
64+
tf.saved_model.simple_save(
65+
sess1,
66+
saved_path,
67+
inputs = {"Inputs/mid_his_batch_ph:0":model.mid_his_batch_ph,"Inputs/cat_his_batch_ph:0":model.cat_his_batch_ph,
68+
"Inputs/uid_batch_ph:0":model.uid_batch_ph,"Inputs/mid_batch_ph:0":model.mid_batch_ph,"Inputs/cat_batch_ph:0":model.cat_batch_ph,
69+
"Inputs/mask:0":model.mask,"Inputs/seq_len_ph:0":model.seq_len_ph,"Inputs/target_ph:0":model.target_ph},
70+
outputs = {"top_full_connect/add_2:0":model.y_hat}
71+
)
10572

10673

10774

@@ -130,6 +97,6 @@ def main(bf16,n_uid,n_mid,n_cat):
13097
cat_d = load_dict(cat_voc)
13198

13299

133-
main(args.bf16,len(uid_d),len(mid_d),len(cat_d))
100+
main(len(uid_d),len(mid_d),len(cat_d))
134101

135102

modelzoo/features/EmbeddingVariable/DIEN/start_serving_dien.cc

Lines changed: 73 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,64 +19,93 @@ static const char* model_config = "{ \
1919
\"update_thread_num\": 2, \
2020
\"model_store_type\": \"local\", \
2121
\"checkpoint_dir\": \"/home/deeprec/DeepRec/modelzoo/features/EmbeddingVariable/DIEN/result/\", \
22-
\"savedmodel_dir\": \"/home/deeprec/DeepRec/modelzoo/features/EmbeddingVariable/DIEN/savedmodels/1658648285/\" \
22+
\"savedmodel_dir\": \"/home/deeprec/DeepRec/modelzoo/features/EmbeddingVariable/DIEN/savedmodels/1658740712/\" \
2323
} ";
2424

2525

26-
::tensorflow::eas::ArrayProto get_proto_float(std::vector<float>& cur_vector){
26+
::tensorflow::eas::ArrayProto get_proto_float_1(std::vector<float>& cur_vector){
2727
::tensorflow::eas::ArrayShape array_shape;
2828
::tensorflow::eas::ArrayDataType dtype_f =
2929
::tensorflow::eas::ArrayDataType::DT_FLOAT;
30-
int num_elem = cur_vector.size();
31-
32-
if (num_elem == 1){
30+
3331
array_shape.add_dim(1);
34-
// array_shape.add_dim(1);
3532
::tensorflow::eas::ArrayProto input;
36-
input.add_float_val(cur_vector.back());
33+
input.add_float_val((float)cur_vector.back());
3734
input.set_dtype(dtype_f);
3835
*(input.mutable_array_shape()) = array_shape;
3936
return input;
40-
}
37+
38+
}
39+
40+
::tensorflow::eas::ArrayProto get_proto_float_2(std::vector<float>& cur_vector){
41+
::tensorflow::eas::ArrayShape array_shape;
42+
::tensorflow::eas::ArrayDataType dtype_f =
43+
::tensorflow::eas::ArrayDataType::DT_FLOAT;
44+
int num_elem = (int)cur_vector.size();
4145

4246
array_shape.add_dim(1);
43-
array_shape.add_dim(cur_vector.size());
47+
if((int)cur_vector.size() < 0){
48+
49+
array_shape.add_dim(1);
50+
::tensorflow::eas::ArrayProto input;
51+
input.add_float_val(1.0);
52+
input.set_dtype(dtype_f);
53+
*(input.mutable_array_shape()) = array_shape;
54+
55+
return input;
56+
}
57+
array_shape.add_dim((int)cur_vector.size());
58+
4459
::tensorflow::eas::ArrayProto input;
45-
for(unsigned int tt = 0; tt < cur_vector.size(); ++tt)
46-
{
47-
input.add_float_val(cur_vector[tt]);
48-
}
60+
for(int tt = 0; tt < (int)cur_vector.size(); ++tt)
61+
{
62+
input.add_float_val((float)cur_vector[tt]);
63+
}
4964
input.set_dtype(dtype_f);
5065
*(input.mutable_array_shape()) = array_shape;
5166

5267
return input;
5368

5469
}
5570

56-
::tensorflow::eas::ArrayProto get_proto_int(std::vector<int>& cur_vector){
71+
::tensorflow::eas::ArrayProto get_proto_int_1(std::vector<int>& cur_vector){
5772
::tensorflow::eas::ArrayShape array_shape;
5873
::tensorflow::eas::ArrayDataType dtype_i =
5974
::tensorflow::eas::ArrayDataType::DT_INT32;
60-
int num_elem = cur_vector.size();
6175

62-
if (num_elem == 1){
76+
array_shape.add_dim(1);
77+
::tensorflow::eas::ArrayProto input;
78+
input.add_int_val((int)cur_vector.back());
79+
input.set_dtype(dtype_i);
80+
*(input.mutable_array_shape()) = array_shape;
81+
return input;
82+
83+
}
84+
85+
::tensorflow::eas::ArrayProto get_proto_int_2(std::vector<int>& cur_vector){
86+
::tensorflow::eas::ArrayShape array_shape;
87+
::tensorflow::eas::ArrayDataType dtype_f =
88+
::tensorflow::eas::ArrayDataType::DT_INT32;
89+
int num_elem = (int)cur_vector.size();
90+
91+
array_shape.add_dim(1);
92+
if((int)cur_vector.size() < 0){
93+
6394
array_shape.add_dim(1);
64-
// array_shape.add_dim(1);
6595
::tensorflow::eas::ArrayProto input;
66-
input.add_int_val(cur_vector.back());
67-
input.set_dtype(dtype_i);
68-
*(input.mutable_array_shape()) = array_shape;
69-
return input;
70-
}
96+
input.add_int_val(1);
97+
input.set_dtype(dtype_f);
98+
*(input.mutable_array_shape()) = array_shape;
7199

72-
array_shape.add_dim(1);
73-
array_shape.add_dim(cur_vector.size());
100+
return input;
101+
}
102+
array_shape.add_dim((int)cur_vector.size());
74103
::tensorflow::eas::ArrayProto input;
75-
for(unsigned int tt = 0; tt < cur_vector.size(); ++tt)
76-
{
77-
input.add_int_val(cur_vector[tt]);
78-
}
79-
input.set_dtype(dtype_i);
104+
for(int tt = 0; tt < (int)cur_vector.size(); ++tt)
105+
{
106+
input.add_int_val((int)cur_vector[tt]);
107+
}
108+
input.set_dtype(dtype_f);
80109
*(input.mutable_array_shape()) = array_shape;
81110

82111
return input;
@@ -96,14 +125,14 @@ int main(int argc, char** argv) {
96125
}
97126

98127
// // ---------------------------------------prepare serving data from file--------------------------------------
99-
128+
100129
FILE *fp = nullptr;
101130
char *line, *record;
102131
char buffer2[1024];
103132
char delim[] = ",";
104133
char next_line[] = "k";
105134
int cur_type = 0;
106-
135+
107136
// vector variables
108137
std::vector<int> cur_uids;
109138
std::vector<int> cur_mids;
@@ -137,7 +166,7 @@ int main(int argc, char** argv) {
137166
cur_type = 0;
138167

139168
// free memory and clear ptrs
140-
for(unsigned int i = 0; i < temp_ptrs.size(); ++i){free(temp_ptrs[i]);}
169+
for(int i = 0; i < (int)temp_ptrs.size(); ++i){free(temp_ptrs[i]);}
141170
temp_ptrs.clear();
142171

143172
// traverse current line
@@ -215,16 +244,19 @@ int main(int argc, char** argv) {
215244

216245
}
217246

247+
// // ---------------------------------------prepare request--------------------------------------
248+
218249

219250
// get all inputs
220-
::tensorflow::eas::ArrayProto proto_uids = get_proto_int(cur_uids);
221-
::tensorflow::eas::ArrayProto proto_mids = get_proto_int(cur_mids);
222-
::tensorflow::eas::ArrayProto proto_cats = get_proto_int(cur_cats);
223-
::tensorflow::eas::ArrayProto proto_mid_his = get_proto_int(cur_mid_his);
224-
::tensorflow::eas::ArrayProto proto_cat_his = get_proto_int(cur_cat_his);
225-
::tensorflow::eas::ArrayProto proto_mid_mask= get_proto_float(cur_mid_mask); //float
226-
::tensorflow::eas::ArrayProto proto_target = get_proto_float(cur_target); //float
227-
::tensorflow::eas::ArrayProto proto_sl = get_proto_int(cur_sl);
251+
::tensorflow::eas::ArrayProto proto_uids = get_proto_int_1(cur_uids); // -1
252+
::tensorflow::eas::ArrayProto proto_mids = get_proto_int_1(cur_mids); // -1
253+
::tensorflow::eas::ArrayProto proto_cats = get_proto_int_1(cur_cats); // -1
254+
::tensorflow::eas::ArrayProto proto_mid_his = get_proto_int_2(cur_mid_his); // -1 -1
255+
::tensorflow::eas::ArrayProto proto_cat_his = get_proto_int_2(cur_cat_his); // -1 -1
256+
::tensorflow::eas::ArrayProto proto_mid_mask= get_proto_float_2(cur_mid_mask); //float // -1 -1
257+
::tensorflow::eas::ArrayProto proto_target = get_proto_float_2(cur_target); //float // -1 -1
258+
::tensorflow::eas::ArrayProto proto_sl = get_proto_int_1(cur_sl); // -1
259+
228260

229261
// setup request
230262
::tensorflow::eas::PredictRequest req;
@@ -244,7 +276,7 @@ int main(int argc, char** argv) {
244276
void *buffer1 = malloc(size);
245277
req.SerializeToArray(buffer1, size);
246278

247-
// ----------------------------------------------process and get feedback---------------------------------------------------
279+
// // -------------------------------------process and get feedback-----------------------------------------
248280
void* output = nullptr;
249281
int output_size = 0;
250282
state = process(model, buffer1, size, &output, &output_size);

modelzoo/features/EmbeddingVariable/WDL/SavedModel_Serving.md renamed to modelzoo/features/EmbeddingVariable/SavedModel_Serving.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ start_serving.cc provides functionality such that you can get serving result aft
4646
bazel build //serving/processor/tests:wdl_demo
4747
bazel-bin/serving/processor/tests/wdl_demo
4848
```
49+
50+
## Special procedure for model DIEN
51+
- Since the train/test dataset of DIEN is quite different from other models like BST,WDL and DeepFM, we prepare a generate_data.py for serving data generation. Please provide data in same format as train/test (local_test_splitByUser, uid_voc.pkl,mid_voc.pkl,cat_voc.pkl) and put them under .data/ (otherwise please modify generate_data.py (line22-26)).
52+
- Generate_data.py will generate a test.csv
53+
- The next steps are similar to previous two parts, just modify start_serving.cc with the path of test.csv generated by generate_data.py and then serving will be automatically done for you.
54+
4955

5056

5157

0 commit comments

Comments
 (0)