Skip to content

Commit 6253e98

Browse files
author
SaerdnaPp
committed
tf input csv
1 parent 14b570f commit 6253e98

5 files changed

+128
-20
lines changed

0001_merge_file_line_num.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
# -*- coding:utf-8 -*-
22

3-
f = open("data/merge/allRawData.txt")
3+
f = open("data/merge/allRawData.txt", "r")
44
line_num = 0
5+
line = f.readline()
6+
str = line.split(" ")
7+
print len(str)
8+
print str[-1]
59
for line in f:
610
line_num += 1
711
print "File has %i lines" % (line_num)

0501_tf_input_csv.py 0501_tf_input_csv_test.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,21 @@
22

33
"""
44
@author: Songgx
5-
@file: 0501_tf_input_csv.py
5+
@file: 0501_tf_input_csv_test.py
66
@time: 11/28/16 4:20 PM
77
"""
88

99
from __future__ import print_function
1010
import tensorflow as tf
1111

12+
1213
def file_len(fname):
1314
with open(fname) as f:
1415
for i, l in enumerate(f):
1516
pass
1617
return i + 1
1718

19+
1820
filename = "data/csv_test_data.csv"
1921

2022
# setup text reader
@@ -24,25 +26,25 @@ def file_len(fname):
2426
_, csv_row = reader.read(filename_queue)
2527

2628
# setup CSV decoding
27-
record_defaults = [[0],[0],[0],[0],[0]]
28-
col1,col2,col3,col4,col5 = tf.decode_csv(csv_row, record_defaults=record_defaults)
29+
record_defaults = [[0], [0], [0], [0], [0]]
30+
col1, col2, col3, col4, col5 = tf.decode_csv(csv_row, record_defaults=record_defaults, field_delim=" ")
2931

3032
# turn features back into a tensor
31-
features = tf.pack([col1,col2,col3,col4])
33+
features = tf.pack([col1, col2, col3, col4])
3234

3335
print("loading, " + str(file_length) + " line(s)\n")
3436
with tf.Session() as sess:
35-
tf.initialize_all_variables().run()
37+
tf.initialize_all_variables().run()
3638

37-
# start populating filename queue
38-
coord = tf.train.Coordinator()
39-
threads = tf.train.start_queue_runners(coord=coord)
39+
# start populating filename queue
40+
coord = tf.train.Coordinator()
41+
threads = tf.train.start_queue_runners(coord=coord)
4042

41-
for i in range(file_length):
42-
# retrieve a single instance
43-
example, label = sess.run([features, col5])
44-
print(example, label)
43+
for i in range(file_length):
44+
# retrieve a single instance
45+
example, label = sess.run([features, col5])
46+
print(example, label)
4547

46-
coord.request_stop()
47-
coord.join(threads)
48-
print("\ndone loading")
48+
coord.request_stop()
49+
coord.join(threads)
50+
print("\ndone loading")

0502_tf_input_merge_file.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# -*- coding:utf-8 -*-
2+
3+
"""
4+
@author: Songgx
5+
@file: 0502_tf_input_merge_file.py
6+
@time: 11/28/16 5:04 PM
7+
"""
8+
9+
from __future__ import print_function
10+
import tensorflow as tf
11+
12+
13+
def file_len(fname):
14+
with open(fname) as f:
15+
for i, l in enumerate(f):
16+
pass
17+
return i + 1
18+
19+
20+
filename = "data/csv_test_data.csv"
21+
22+
# setup text reader
23+
file_length = file_len(filename)
24+
filename_queue = tf.train.string_input_producer([filename])
25+
reader = tf.TextLineReader(skip_header_lines=1)
26+
_, csv_row = reader.read(filename_queue)
27+
28+
# setup CSV decoding
29+
record_defaults = [[0], [0], [0], [0], [0]]
30+
col1, col2, col3, col4, col5 = tf.decode_csv(csv_row, record_defaults=record_defaults, field_delim=" ")
31+
32+
# turn features back into a tensor
33+
features = tf.pack([col1, col2, col3, col4])
34+
35+
print("loading, " + str(file_length) + " line(s)\n")
36+
with tf.Session() as sess:
37+
tf.initialize_all_variables().run()
38+
39+
# start populating filename queue
40+
coord = tf.train.Coordinator()
41+
threads = tf.train.start_queue_runners(coord=coord)
42+
43+
for i in range(file_length):
44+
# retrieve a single instance
45+
example, label = sess.run([features, col5])
46+
print(example, label)
47+
48+
coord.request_stop()
49+
coord.join(threads)
50+
print("\ndone loading")

data/add_class_in_each_row.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# -*- coding:utf-8 -*-
2+
3+
"""
4+
@author: Songgx
5+
@file: add_class_in_each_row.py
6+
@time: 11/28/16 7:20 PM
7+
"""
8+
9+
TOTAL_ROW_NUM = 1000
10+
11+
# line 0-999
12+
# line 0-99 class 0
13+
# line 100-199 class 1
14+
# ...
15+
# line 900-999 class 9
16+
17+
class_num = 0
18+
line_num = 0
19+
20+
fr = open("merge/allRawData.txt", "r")
21+
fw = open("merge/raw_data.txt", "w")
22+
for line in fr:
23+
# line += str(class_num) not worked
24+
line = line.strip() + " " + str(class_num) + "\n" # cancel "\n" and "\r"
25+
fw.write(line)
26+
line_num += 1
27+
if line_num % 100 == 0:
28+
class_num += 1
29+
print "%i / 1000 lines finished." % (line_num)
30+
fr.close()
31+
fw.close()
32+
33+
print "Verify new file:"
34+
fr1 = open("merge/raw_data.txt", "r")
35+
for i in range(1000):
36+
line_num1 = i + 1
37+
l = fr1.readline()[-10:]
38+
if (line_num1+1) % 100 == 0 or (line_num1-1) % 100 == 0 or line_num1 % 100 == 0:
39+
print "line-" + str(line_num1) + ":" + l.strip()
40+
fr1.close()
41+
print "Finished."
42+
43+
44+
45+
46+
47+
48+
49+
50+
51+

data/csv_test_data.csv

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
0,0,0,0,0
2-
0,15,0,0,0
3-
0,30,0,0,0
4-
0,45,0,0,0
1+
0 15 0 0 0
2+
0 30 0 0 0
3+
0 45 0 0 0
4+
0 78 0 0 0
5+
0 65 0 12 1

0 commit comments

Comments
 (0)