tensorflow文字讀取---My way of AI17

阿新 • • 發佈：2018-12-18

流程

tendorflow可以讀取文字檔案，二進位制檔案，圖片檔案和tfrecords檔案。除了閱讀器不一樣，讀檔案的流程是差不多的。

1.構造檔案佇列 2.讀取檔案內容 3.解碼 4.批處理

import tensorflow as tf
import os


# 模擬一下同步先處理資料，然後才能取資料訓練
# tensorflow當中，執行操作有依賴性

# # 1、首先定義佇列
# Q = tf.FIFOQueue(3, tf.float32)
#
# # 放入一些資料
# enq_many = Q.enqueue_many([[0.1, 0.2, 0.3], ])
#
# # 2、定義一些處理資料的螺距，取資料的過程      取資料，+1， 入佇列 

#
# out_q = Q.dequeue()
#
# data = out_q + 1
#
# en_q = Q.enqueue(data)
#
# with tf.Session() as sess:
#     # 初始化佇列
#     sess.run(enq_many)
#
#     # 處理資料
#     for i in range(100):
#         sess.run(en_q)
#
#     # 訓練資料
#     for i in range(Q.size().eval()):
#         print(sess.run(Q.dequeue())) 



# 模擬非同步子執行緒 存入樣本， 主執行緒 讀取樣本

# # 1、定義一個佇列，1000
# Q = tf.FIFOQueue(1000, tf.float32)
#
# # 2、定義要做的事情 迴圈 值，+1， 放入隊列當中
# var = tf.Variable(0.0)
#
# # 實現一個自增  tf.assign_add
# data = tf.assign_add(var, tf.constant(1.0))
#
# en_q = Q.enqueue(data)
#
# # 3、定義佇列管理器op, 指定多少個子執行緒，子執行緒該幹什麼事情
# qr = tf.train.QueueRunner(Q, enqueue_ops=[en_q] * 2) 

#
# # 初始化變數的OP
# init_op = tf.global_variables_initializer()
#
# with tf.Session() as sess:
#     # 初始化變數
#     sess.run(init_op)
#
#     # 開啟執行緒管理器
#     coord = tf.train.Coordinator()
#
#     # 真正開啟子執行緒
#     threads = qr.create_threads(sess, coord=coord, start=True)
#
#     # 主執行緒，不斷讀取資料訓練
#     for i in range(300):
#         print(sess.run(Q.dequeue()))
#
#     # 回收你
#     coord.request_stop()
#
#     coord.join(threads)


# 批處理大小，跟佇列，資料的數量沒有影響，只決定 這批次取多少資料


def csvread(filelist):
    """
    讀取CSV檔案
    :param filelist: 檔案路徑+名字的列表
    :return: 讀取的內容
    """
    # 1、構造檔案的佇列
    file_queue = tf.train.string_input_producer(filelist)

    # 2、構造csv閱讀器讀取佇列資料（按一行）
    reader = tf.TextLineReader()

    key, value = reader.read(file_queue)

    # 3、對每行內容解碼
    # record_defaults:指定每一個樣本的每一列的型別，指定預設值[["None"], [4.0]]
    records = [["None"], ["None"]]

    example, label = tf.decode_csv(value, record_defaults=records)

    # 4、想要讀取多個數據，就需要批處理
    example_batch, label_batch = tf.train.batch([example, label], batch_size=9, num_threads=1, capacity=9)

    print(example_batch, label_batch)
    return example_batch, label_batch


def picread(filelist):
    """
    讀取狗圖片並轉換成張量
    :param filelist: 檔案路徑+ 名字的列表
    :return: 每張圖片的張量
    """
    # 1、構造檔案佇列
    file_queue = tf.train.string_input_producer(filelist)

    # 2、構造閱讀器去讀取圖片內容（預設讀取一張圖片）
    reader = tf.WholeFileReader()

    key, value = reader.read(file_queue)

    print(value)

    # 3、對讀取的圖片資料進行解碼
    image = tf.image.decode_jpeg(value)

    print(image)

    # 5、處理圖片的大小（統一大小）
    image_resize = tf.image.resize_images(image, [200, 200])

    print(image_resize)

    # 注意：一定要把樣本的形狀固定 [200, 200, 3],在批處理的時候要求所有資料形狀必須定義
    image_resize.set_shape([200, 200, 3])

    print(image_resize)

    # 6、進行批處理
    image_batch = tf.train.batch([image_resize], batch_size=20, num_threads=1, capacity=20)

    print(image_batch)

    return image_batch


# 定義cifar的資料等命令列引數
FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("cifar_dir", "./data/cifar10/cifar-10-batches-bin/", "檔案的目錄")
tf.app.flags.DEFINE_string("cifar_tfrecords", "./tmp/cifar.tfrecords", "存進tfrecords的檔案")


class CifarRead(object):
    """完成讀取二進位制檔案， 寫進tfrecords，讀取tfrecords
    """
    def __init__(self, filelist):
        # 檔案列表
        self.file_list = filelist

        # 定義讀取的圖片的一些屬性
        self.height = 32
        self.width = 32
        self.channel = 3
        # 二進位制檔案每張圖片的位元組
        self.label_bytes = 1
        self.image_bytes = self.height * self.width * self.channel
        self.bytes = self.label_bytes + self.image_bytes

    def read_and_decode(self):

        # 1、構造檔案佇列
        file_queue = tf.train.string_input_producer(self.file_list)

        # 2、構造二進位制檔案讀取器，讀取內容, 每個樣本的位元組數
        reader = tf.FixedLengthRecordReader(self.bytes)

        key, value = reader.read(file_queue)

        # 3、解碼內容, 二進位制檔案內容的解碼
        label_image = tf.decode_raw(value, tf.uint8)

        print(label_image)

        # 4、分割出圖片和標籤資料，切除特徵值和目標值
        label = tf.cast(tf.slice(label_image, [0], [self.label_bytes]), tf.int32)

        image = tf.slice(label_image, [self.label_bytes], [self.image_bytes])

        # 5、可以對圖片的特徵資料進行形狀的改變 [3072] --> [32, 32, 3]
        image_reshape = tf.reshape(image, [self.height, self.width, self.channel])

        print(label, image_reshape)
        # 6、批處理資料
        image_batch, label_batch = tf.train.batch([image_reshape, label], batch_size=10, num_threads=1, capacity=10)

        print(image_batch, label_batch)
        return image_batch, label_batch

    def write_ro_tfrecords(self, image_batch, label_batch):
        """
        將圖片的特徵值和目標值存進tfrecords
        :param image_batch: 10張圖片的特徵值
        :param label_batch: 10張圖片的目標值
        :return: None
        """
        # 1、建立TFRecord儲存器
        writer = tf.python_io.TFRecordWriter(FLAGS.cifar_tfrecords)

        # 2、迴圈將所有樣本寫入檔案，每張圖片樣本都要構造example協議
        for i in range(10):
            # 取出第i個圖片資料的特徵值和目標值
            image = image_batch[i].eval().tostring()

            label = int(label_batch[i].eval()[0])

            # 構造一個樣本的example
            example =  tf.train.Example(features=tf.train.Features(feature={
                "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
                "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),
            }))

            # 寫入單獨的樣本
            writer.write(example.SerializeToString())

        # 關閉
        writer.close()
        return None

    def read_from_tfrecords(self):

        # 1、構造檔案佇列
        file_queue = tf.train.string_input_producer([FLAGS.cifar_tfrecords])

        # 2、構造檔案閱讀器，讀取內容example,value=一個樣本的序列化example
        reader = tf.TFRecordReader()

        key, value = reader.read(file_queue)

        # 3、解析example
        features = tf.parse_single_example(value, features={
            "image": tf.FixedLenFeature([], tf.string),
            "label": tf.FixedLenFeature([], tf.int64),
        })

        # 4、解碼內容, 如果讀取的內容格式是string需要解碼， 如果是int64,float32不需要解碼
        image = tf.decode_raw(features["image"], tf.uint8)

        # 固定圖片的形狀，方便與批處理
        image_reshape = tf.reshape(image, [self.height, self.width, self.channel])

        label = tf.cast(features["label"], tf.int32)

        print(image_reshape, label)

        # 進行批處理
        image_batch, label_batch = tf.train.batch([image_reshape, label], batch_size=10, num_threads=1, capacity=10)

        return image_batch, label_batch


if __name__ == "__main__":
    # 1、找到檔案，放入列表   路徑+名字  ->列表當中
    file_name = os.listdir(FLAGS.cifar_dir)

    filelist = [os.path.join(FLAGS.cifar_dir, file) for file in file_name if file[-3:] == "bin"]

    # print(file_name)
    cf = CifarRead(filelist)

    # image_batch, label_batch = cf.read_and_decode()

    image_batch, label_batch = cf.read_from_tfrecords()

    # 開啟會話執行結果
    with tf.Session() as sess:
        # 定義一個執行緒協調器
        coord = tf.train.Coordinator()

        # 開啟讀檔案的執行緒
        threads = tf.train.start_queue_runners(sess, coord=coord)

        # 存進tfrecords檔案
        # print("開始儲存")
        #
        # cf.write_ro_tfrecords(image_batch, label_batch)
        #
        # print("結束儲存")

        # 列印讀取的內容
        print(sess.run([image_batch, label_batch]))

        # 回收子執行緒
        coord.request_stop()

        coord.join(threads)

tensorflow文字讀取---My way of AI17

流程 tendorflow可以讀取文字檔案，二進位制檔案，圖片檔案和tfrecords檔案。除了閱讀器不一樣，讀檔案的流程是差不多的。 1.構造檔案佇列 2.讀取檔案內容 3.解碼 4.批處理 import tensorflow as tf import os

驗證碼識別1---My way of AI 23

寫一個練手的驗證碼識別專案資料集和完整程式碼我會傳到我的下載資源這篇文章是專案第一步，建立tfrecords檔案 # 程式碼邏輯 1.讀取圖片檔案 2.讀取csv檔案 3.處理一下讀取好的csv檔案到數字張量 4.寫入tfrecords檔案 1.讀取圖片檔案 1.建立

資料集-My way of ML5

資料集劃分一般是0.75劃分成訓練集，0.25劃分成測試集資料集劃分API：sklearn.model_selection.train_test_split(test_size=0.25) sklearn流行資料集 sklearn.databases.l

樸素貝葉斯演算法-My way of ML7

預備知識聯合概率：包含多個條件，所有條件同時成立概率P（A，B）=P（A）P（B）條件概率：事件A發生在事件B發生的條件之下的概率。所有的特徵值無關的時候才能適用條件概率樸素貝葉斯的前提是：特徵條件獨立，哈哈，這也是她被叫做樸素的原因，因為特徵之間很難獨

拉普拉斯平滑-My way of ML7.1

樸素貝葉斯演算法有一個問題：某篇文章是娛樂的概率是0這是不合理的，如果詞頻列表中出現很多次數都是0的話，很可能計算結果都是 0. 解決辦法：拉普拉斯平滑 p（Fi|C）=(Ni+alpha)/(N+alpha*m) alpha是指定的係數一般是1，m是訓練文件

初識神經網路---My way of AI18

感知機說到神經網路之前我們先說一下感知機，它其實是一種偽神經元。有n個數據，通過權重與各種資料之間的計算和比較啟用函式結果，得出輸出，它的主要目的是解決分類問題。但是如何現在是有一個感知機解決不了的我們就引入了多個感知機，這其實也就是我們常說的svm演算法

MNIST手寫數字識別---My way of AI 19

手寫數字的識別相當於是深度學習的helloworld 首先這是一個流行資料集可以從網路下載，也可以直接import from tensorflow.examples.tutorials.mnist import input_data 學這個簡單的神經網路之前我

卷積神經網路---My way of AI 21

卷積神經網路上一篇文章實現了用全連線層神經網路手寫數字識別，但是全連線層神經網路有一個缺點就是：假設我是32323的RGB圖片，如果我的第一層全連線層是500個節點，那麼我需要的引數就是32323*500+500一共需要150多萬個引數，這樣會導致計算過慢以及

My year of 2017

德國時光躲避足球生命曾經回憶證明繼續有一個姓羅的胖子，他說他有一個要堅持20年計劃，第一年我真的不覺得什麽，好比每天晚上都要刷牙每天早上都要吃早飯一樣簡單。實際幾年走下來之後，發現能堅持下來真不是一件容易的事情，生活中總會有各種各樣的事情發生，可能這一年之

the Way of Python Day 2

sim similar sha maximum per AD lar pick may 　　today,i got lots of knowledge of python ,like how to get the maximum value of three numbers

The way of Webpack learning (I.) -- Configure Webpack from zero（從零開始配置webpack）

-- UNC 初始 exp light 方法 name npm .html 學習之路基於webpack3.10.0，webpack4.0之後更新。一：開始前的配置 1、初始化項目，其實就是新建一個package.json文件，後面的命令依賴裏面的配置項。 npm ini

my questions of C#

style 接口只讀而已 pan 外部類繼承接口類方式語法關於類屬性的訪問權限對於Python來說，並不存在私有屬性——雖然提供了一個雙下劃線的“人為定義”，這使得你在直接訪問時獲取到Exception，但

使用Tensorflow來讀取訓練自己的資料（三）

本文詳解training.py是如何編寫的。 import os import numpy as np import tensorflow as tf import input_data import model N_CLASSES = 2 # 二分類問題，只有是還是否，即0，1 IMG_W

使用Tensorflow來讀取訓練自己的資料（二）

接上一篇，繼續分析，model.py，也就是模型的構建。兩個卷積層，兩個池化層，以及後面的全連線層怎麼通過tensorflow定義的。 import tensorflow as tf def inference(images, batch_size, n_classess): # c

使用Tensorflow來讀取訓練自己的資料（一）

本文的程式碼以及思路都是參考別人的，現在只是整理一下思路，做一些解釋，畢竟是小白。首先本文所使用的圖片資料都是https://www.kaggle.com/下載的，使用的是貓和狗的圖片集，https://www.kaggle.com/c/dogs-vs-cats-redux-ker

tensorflow中讀取模型中儲存的值, tf.train.NewCheckpointReader

使用tf.trian.NewCheckpointReader(model_dir) 一個標準的模型檔案有一下檔案, model_dir就是MyModel(沒有後綴) checkpoint Model.meta Model.data-00000-of-00001 Model.index

C++文字讀取和寫入

#include <fstream> ofstream //檔案寫操作記憶體寫入儲存裝置

tensorflow error : Assign requires shapes of both tensors to match.

Assign requires shapes of both tensors to match. lhs shape= [256，4096] rhs shape= [8192，4096] 訓練和測試中的網路引數不同導致的錯誤。但我使用的是同一個model檔案，最後發現256和8192之間正

tensorflow 批次讀取檔案內的資料，並將順序隨機化處理. --[python]

使用tensorflow批次的讀取預處理之後的文字資料，並將其分為一個迭代器批次：比如此刻，我有一個處理之後的資料包： data.csv shape =(8,10)，其中這個結構中，前五個列為feature , 後五列為label 1,2,3,4,5,6,7,8,9,10 11,12

tensorflow 批次讀取文件內的數據，並將順序隨機化處理. --[python]

constant field ads run clas rom nump bsp 行數據使用tensorflow批次的讀取預處理之後的文本數據，並將其分為一個叠代器批次：比如此刻，我有一個處理之後的數據包： data.csv shape =(8,10)，其中這個結構

tensorflow文字讀取---My way of AI17

流程

相關推薦