Skip to content

Commit 3c87f7c

Browse files
committed
Add script to generate and print boston housing dataset
1 parent 9fdc628 commit 3c87f7c

File tree

4 files changed

+78
-174
lines changed

4 files changed

+78
-174
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env python
2+
3+
import tensorflow as tf
4+
import os
5+
6+
7+
def generate_tfrecords(input_filename, output_filename):
8+
print("Start to convert {} to {}".format(input_filename, output_filename))
9+
writer = tf.python_io.TFRecordWriter(output_filename)
10+
11+
index = 0
12+
for line in open(input_filename, "r"):
13+
index += 1
14+
15+
# Ignore the first line
16+
if index == 1:
17+
continue
18+
19+
data = line.split(",")
20+
label = float(data[14])
21+
features = [float(i) for i in data[1:14]]
22+
23+
example = tf.train.Example(features=tf.train.Features(feature={
24+
"label":
25+
tf.train.Feature(float_list=tf.train.FloatList(value=[label])),
26+
"features":
27+
tf.train.Feature(float_list=tf.train.FloatList(value=features)),
28+
}))
29+
writer.write(example.SerializeToString())
30+
31+
writer.close()
32+
print("Successfully convert {} to {}".format(input_filename,
33+
output_filename))
34+
35+
36+
def main():
37+
current_path = os.getcwd()
38+
for filename in os.listdir(current_path):
39+
if filename.startswith("") and filename.endswith(".csv"):
40+
generate_tfrecords(filename, filename + ".tfrecords")
41+
42+
43+
if __name__ == "__main__":
44+
main()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env python
2+
3+
import tensorflow as tf
4+
import os
5+
6+
7+
def print_tfrecords(input_filename):
8+
max_print_number = 100
9+
current_print_number = 0
10+
11+
for serialized_example in tf.python_io.tf_record_iterator(input_filename):
12+
# Get serialized example from file
13+
example = tf.train.Example()
14+
example.ParseFromString(serialized_example)
15+
label = example.features.feature["label"].float_list.value
16+
features = example.features.feature["features"].float_list.value
17+
print("Number: {}, label: {}, features: {}".format(current_print_number,
18+
label, features))
19+
20+
# Return when reaching max print number
21+
current_print_number += 1
22+
if current_print_number > max_print_number:
23+
exit()
24+
25+
26+
def main():
27+
current_path = os.getcwd()
28+
tfrecords_file_name = "train.csv.tfrecords"
29+
input_filename = os.path.join(current_path, tfrecords_file_name)
30+
print_tfrecords(input_filename)
31+
32+
33+
if __name__ == "__main__":
34+
main()

data/boston_housing/test.csv

Lines changed: 0 additions & 174 deletions
This file was deleted.
34.8 KB
Binary file not shown.

0 commit comments

Comments
 (0)