1. 程式人生 > >tensorflw資料寫入為tfrecord,使用DataSet讀取

tensorflw資料寫入為tfrecord,使用DataSet讀取

#寫入tfrecord
def create_tf_record(inputs, labels,  tfrecords_filename):
    writer = tf.python_io.TFRecordWriter(tfrecords_filename)
    for input, in label in zip(inputs, labels):
        # 開始存入一個數據
	    raw = np.array(inputs).tostring()
	    labels = np.array(human_labels, dtype=np.int64).tostring()
	    example = tf.train.Example(features=tf.train.Features(
	        feature={
	            'label': tf.train.Feature(bytes_list=tf.train.Int64List(value=[labels])),
	            'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[raw]))
	        }))
	    writer.write(example.SerializeToString())

    writer.close()

# 使用DataSet按batch讀取
def get_batch(batchSize=10):
	# 解析tfrecord
    def parser(record):
        features = tf.parse_single_example(record,
                                           features={
                                               'label': tf.FixedLenFeature([], tf.int64),
                                               'data': tf.FixedLenFeature([], tf.string),
                                           })
        data = tf.decode_raw(features['data'], tf.float32)
        label = tf.decode_raw(features['label'], tf.int64)

		# 調整資料的shape
        data = tf.reshape(data, [40, 40, 3])
        label = tf.reshape(label, [10])

        return data, label
	
	# 檔案路徑
    tfrecords_filenames = ['../tdrecord/1.tfrecords', '../tdrecord/2.tfrecords']
    dataset = tf.data.TFRecordDataset(tfrecords_filenames)
    dataset = dataset.map(parser)
    # dataset佇列中保持有500個數據,對資料做100次重複,每次產生32個數據
    dataset = dataset.shuffle(500).repeat(100).batch(32)
	# 產生資料
    iterator = dataset.make_one_shot_iterator()

    data, label = iterator.get_next()
    return data, label