0%

TensorFlow_Datasets基本使用

按顺序必读官方指引 TensorFlow Datasets and Estimators

标题 说明 时间
Introduction to TensorFlow Datasets and Estimators Google Develops Blog Part 1 2017-09-12
Introducing TensorFlow Feature Columns Google Develops Blog Part 2 2017-11-20
Creating Custom Estimators in TensorFlow Google Develops Blog Part 3 2017-12-19
Classifying text with TensorFlow Estimators Part 4 2018-03-07

tfrecord 详解

示例代码 GenerateSimpleTFRecord.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Demonstration of creating TFRecord file with simple data types like int, float, string

import tensorflow as tf
import numpy as np

data_arr = [
{
'int_data': 108,
'float_data': 2.45,
'str_data': 'String 100',
'float_list_data': [256.78, 13.9],
'image_data': np.random.uniform(0, 255, size=(3, 4, 2))
},
{
'int_data': 37,
'float_data': 84.3,
'str_data': 'String 200',
'float_list_data': [1.34, 843.9, 65.22],
'image_data': np.random.uniform(0, 255, size=(3, 4, 2))
}
]

tf.reset_default_graph()


def get_example_object(data_record):
# Convert individual data into a list of int64 or float or bytes
int_list1 = tf.train.Int64List(value=[data_record['int_data']])
float_list1 = tf.train.FloatList(value=[data_record['float_data']])
# Convert string data into list of bytes
str_list1 = tf.train.BytesList(value=[data_record['str_data'].encode('utf-8')])
float_list2 = tf.train.FloatList(value=data_record['float_list_data'])
image_list = tf.train.BytesList(value=[data_record['image_data'].tostring()])

# Create a dictionary with above lists individually wrapped in Feature
feature_key_value_pair = {
'int_list1': tf.train.Feature(int64_list=int_list1),
'float_list1': tf.train.Feature(float_list=float_list1),
'str_list1': tf.train.Feature(bytes_list=str_list1),
'float_list2': tf.train.Feature(float_list=float_list2),
'image_list': tf.train.Feature(bytes_list=image_list)
}

# Create Features object with above feature dictionary
features = tf.train.Features(feature=feature_key_value_pair)

# Create Example object with features
example = tf.train.Example(features=features)
return example


with tf.python_io.TFRecordWriter('example.tfrecord') as tfwriter:
# Iterate through all records
for data_record in data_arr:
example = get_example_object(data_record)

# Append each example into tfrecord
tfwriter.write(example.SerializeToString())

示例代码 ExtractSimpleTFRecord.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Demonstration of extracting TFRecord file with simple data types like int, float, string

import tensorflow as tf

tf.reset_default_graph()

def extract_fn(data_record):
features = {
# Extract features using the keys set during creation
'int_list1': tf.FixedLenFeature([], tf.int64),
'float_list1': tf.FixedLenFeature([], tf.float32),
'str_list1': tf.FixedLenFeature([], tf.string),
# If size is different of different records, use VarLenFeature
'float_list2': tf.VarLenFeature(tf.float32),
'image_list': tf.FixedLenFeature([], tf.string)
}
sample = tf.parse_single_example(data_record, features)
sample['float_list2'] = tf.sparse.to_dense(sample['float_list2'])
sample['image_list'] = tf.decode_raw(sample['image_list'], tf.float64)
sample['image_list'] = tf.reshape(sample['image_list'], (3, 4, 2))
return sample

# Initialize all tfrecord paths
dataset = tf.data.TFRecordDataset(['example.tfrecord'])
dataset = dataset.map(extract_fn)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

try:
while True:
data_record = sess.run(next_element)
print(data_record)
except:
pass

tfrecord basic explanation 方法总结

For example, see the code

Producte tfrecord files

1. Open TFRecordWriter

1
writer = tf.python_io.TFRecordWriter(tfrecord_store_path)

2. prepater to feature_dict

1
2
3
4
5
6
7
8
9
10
11
for index, row in pandas_datafram_data.iterrows():
dict_row = dict(row)
feature_dict = {}
# prepater to feature_dict
for k,v in dict_row.items():
if k in ['feature1', 'feature2',...,'featureN']:
feature_dict[k] = tf.train.Feature(float_list=tf.train.FloatList(value=[v]))
elif k in ['feature1', 'feature2',...,'featureN']:
feature_dict[k] = tf.train.Feature(int64_list=tf.train.Int64List(value=[v])
else:
feature_dict[k] = tf.train.Feature(int64_list=tf.train.BytesList(value=[v)])

3. producte data example

1
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))

4. serialize to string

1
serialized = example.SerializeToString()

5. write a example to tfrecord file

1
writer.write(serialized)

6. Close TFRecordWriter

1
writer.close()

Using tfrecord files

1. Open TFRecordDataset

1
2
filenames = ['tfrecord_file_name1','tfrecord_file_name2']
tf_dataset = tf.data.TFRecordDataset(filenames)

2. Parse dataset

1
2
3
4
5
6
def _parse_function(record):
features = {"feature1": tf.FixedLenFeature((), tf.float32, default_value=0.0),
"feature2": tf.FixedLenFeature((), tf.int64, default_value=0)}
parsed_features = tf.parse_single_example(record, features)
return {"feature1": parsed_features["feature1"]}, parsed_features["Species"]
dataset = tf_dataset.map(_parse_function)

3. Shuffle, repeat, and batch the examples

1
tf_dataset = dataset.shuffle(1000).repeat().batch(batch_size)

4. Iterate dataset

1
2
3
4
5
6
tf_iterator = tf_dataset.make_one_shot_iterator()
next_element = tf_iterator.get_next()
with tf.Session() as sess:
for i in range(show_numbers):
a_data = sess.run(next_element)
print(a_data)

TFRecord 相关博客

标题 说明 时间
convert_to_records.py tensorflow/tensorflow/examples/how_tos/reading_data/convert_to_records.py 20171201
fully_connected_reader.py tensorflow/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py 20181203
导入数据 TensorFlow Guide 持续更新
TensorFlow中层API:Datasets+TFRecord的数据导入 对应代码TensorFlow Dataset + TFRecords.ipynb 20180126
Tensorflow-tf-estimator lanhong 20181103
TFRecord_Images Illustration of how to create TFRecord with images and pipelined into Datasets and Iterators 20180723
本站所有文章和源码均免费开放,如您喜欢,可以请我喝杯咖啡