tensorflw資料寫入為tfrecord,使用DataSet讀取
阿新 • • 發佈:2018-12-17
#寫入tfrecord def create_tf_record(inputs, labels, tfrecords_filename): writer = tf.python_io.TFRecordWriter(tfrecords_filename) for input, in label in zip(inputs, labels): # 開始存入一個數據 raw = np.array(inputs).tostring() labels = np.array(human_labels, dtype=np.int64).tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label': tf.train.Feature(bytes_list=tf.train.Int64List(value=[labels])), 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[raw])) })) writer.write(example.SerializeToString()) writer.close()
# 使用DataSet按batch讀取 def get_batch(batchSize=10): # 解析tfrecord def parser(record): features = tf.parse_single_example(record, features={ 'label': tf.FixedLenFeature([], tf.int64), 'data': tf.FixedLenFeature([], tf.string), }) data = tf.decode_raw(features['data'], tf.float32) label = tf.decode_raw(features['label'], tf.int64) # 調整資料的shape data = tf.reshape(data, [40, 40, 3]) label = tf.reshape(label, [10]) return data, label # 檔案路徑 tfrecords_filenames = ['../tdrecord/1.tfrecords', '../tdrecord/2.tfrecords'] dataset = tf.data.TFRecordDataset(tfrecords_filenames) dataset = dataset.map(parser) # dataset佇列中保持有500個數據,對資料做100次重複,每次產生32個數據 dataset = dataset.shuffle(500).repeat(100).batch(32) # 產生資料 iterator = dataset.make_one_shot_iterator() data, label = iterator.get_next() return data, label