1. 程式人生 > >Implement TensorFlow's next_batch for own data

Implement TensorFlow's next_batch for own data

end examples array elf ted ram pass prop completed

The version of numpy data

import numpy as np

class Dataset:
    def __init__(self, data):
        self._index_in_epoch = 0
        self._epochs_completed = 0
        self._data = data
        self._num_examples = data.shape[0]
        pass

    @property
    def data(self):
        return self._data

    def next_batch(self, batch_size, shuffle=True):
        start = self._index_in_epoch
        if start == 0 and self._epochs_completed == 0:
            idx = np.arange(0, self._num_examples)
            np.random.shuffle(idx)  # shuffle indexe
            self._data = self.data[idx]  # get the shuffled data

        # go to the data of next batch
        if start + batch_size > self._num_examples:
            ‘‘‘
            note: when start  == self._num_examples, data_rest_part = np.array([])
            ‘‘‘
            self._epochs_completed += 1
            # print(self.data)
            rest_num_examples = self._num_examples - start
            data_rest_part = self.data[start:self._num_examples]
            idx_update = np.arange(0, self._num_examples)
            np.random.shuffle(idx_update)
            self._data = self.data[idx_update]  # get another shuffled data

            start = 0
            self._index_in_epoch = batch_size - rest_num_examples
            end = self._index_in_epoch
            data_new_part = self._data[start:end]
            return np.concatenate((data_rest_part, data_new_part), axis=0)
        else:
            self._index_in_epoch += batch_size
            end = self._index_in_epoch
            return self._data[start:end]

dataset = Dataset(np.arange(0, 10))
for i in range(10):
    print(dataset.next_batch(6))
print(dataset.data)

The version of pandas data

import numpy as np
import pandas as pd
class Dataset:
    def __init__(self, data):
        self._index_in_epoch = 0
        self._epochs_completed = 0
        self._data = data
        self._num_examples = data.shape[0]
        pass

    @property
    def data(self):
        return self._data

    def next_batch(self, batch_size, shuffle=True):
        start = self._index_in_epoch
        if start == 0 and self._epochs_completed == 0:
            idx = np.arange(0, self._num_examples)
            np.random.shuffle(idx)  # shuffle index
            self._data = self.data.iloc[idx,:]  # get the shuffled data

        # go to the data of next batch
        if start + batch_size > self._num_examples:
            ‘‘‘
            note: when start  == self._num_examples, data_rest_part = np.array([])
            ‘‘‘
            self._epochs_completed += 1
            # print(self.data) # this is for debug
            rest_num_examples = self._num_examples - start
            data_rest_part = self.data.iloc[start:self._num_examples,:]
            idx_update = np.arange(0, self._num_examples)
            np.random.shuffle(idx_update)
            self._data = self.data.iloc[idx_update,:]  # get another shuffled data

            start = 0
            self._index_in_epoch = batch_size - rest_num_examples
            end = self._index_in_epoch
            data_new_part = self._data.iloc[start:end,:]
            return pd.concat((data_rest_part, data_new_part), axis=0)
        else:
            self._index_in_epoch += batch_size
            end = self._index_in_epoch
            return self._data[start:end]

df = pd.DataFrame()
df[‘a‘]=np.arange(10)
df[‘b‘]=np.arange(10)*10
dataset = Dataset(df)
for i in range(10):
    print(dataset.next_batch(5))
print(dataset.data)

Implement TensorFlow's next_batch for own data