caffe 教程 Fine-tuning a Pretrained Network for Style Recognition下載資料
阿新 • • 發佈:2019-01-08
問題:執行python examples/finetune_flickr_style/assemble_data.py --workers=1 --images=2000 --seed 831486
命令下載Flickr Style資料,然而提示:Writing train/val for 0 successfully downloaded images.
,檢視caffe/data/flickr_style/images
目錄下並沒有下載到資料集
原因:1)原來的程式碼使用多執行緒、多程序;2)Python2 和Python3的語法不相容
解決:修改caffe/examples/finetune_flickr_style/assemble_data.py 為如下:
"""
Form a subset of the Flickr Style data, download images to dirname, and write
Caffe ImagesDataLayer training file.
"""
import os
import urllib.request #修改,Python3使用import urllib.request,Python2使用import urllib
import hashlib
import argparse
import numpy as np
import pandas as pd
from skimage import io
import multiprocessing
import socket
# Flickr returns a special image if the request is unavailable.
MISSING_IMAGE_SHA1 = '6a92790b1c2a301c6e7ddef645dca1f53ea97ac2'
example_dirname = os.path.abspath(os.path.dirname(__file__))
caffe_dirname = os.path.abspath(os.path.join(example_dirname, '../..' ))
training_dirname = os.path.join(caffe_dirname, 'data/flickr_style')
#修改,將原來的download_image函式修改為mydownload_image函式
def mydownload_image(args_tuple):
try:
url, filename = args_tuple
if not os.path.exists(filename):
urllib.request.urlretrieve(url, filename) #修改,Python3 使用urllib.request,Python2 使用urllib
return True
except KeyboardInterrupt:
raise Exception()
except:
return False
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Download a subset of Flickr Style to a directory')
parser.add_argument(
'-s', '--seed', type=int, default=0,
help="random seed")
parser.add_argument(
'-i', '--images', type=int, default=-1,
help="number of images to use (-1 for all [default])",
)
parser.add_argument(
'-w', '--workers', type=int, default=-1,
help="num workers used to download images. -x uses (all - x) cores [-1 default]."
)
parser.add_argument(
'-l', '--labels', type=int, default=0,
help="if set to a positive value, only sample images from the first number of labels."
)
args = parser.parse_args()
np.random.seed(args.seed)
# Read data, shuffle order, and subsample.
csv_filename = os.path.join(example_dirname, 'flickr_style.csv.gz')
df = pd.read_csv(csv_filename, index_col=0, compression='gzip')
df = df.iloc[np.random.permutation(df.shape[0])]
if args.labels > 0:
df = df.loc[df['label'] < args.labels]
if args.images > 0 and args.images < df.shape[0]:
df = df.iloc[:args.images]
# Make directory for images and get local filenames.
if training_dirname is None:
training_dirname = os.path.join(caffe_dirname, 'data/flickr_style')
images_dirname = os.path.join(training_dirname, 'images')
if not os.path.exists(images_dirname):
os.makedirs(images_dirname)
df['image_filename'] = [
os.path.join(images_dirname, _.split('/')[-1]) for _ in df['image_url']
]
# Download images.
num_workers = args.workers
if num_workers <= 0:
num_workers = multiprocessing.cpu_count() + num_workers
print('Downloading {} images with {} workers...'.format(
df.shape[0], num_workers))
#pool = multiprocessing.Pool(processes=num_workers) #修改,註釋掉原來的多執行緒、多程序使用
map_args = zip(df['image_url'], df['image_filename'])
#results = pool.map(download_image, map_args) #修改,註釋掉原來的多執行緒、多程序使用
socket.setdefaulttimeout(6)
results = []
for item in map_args:
value = mydownload_image(item) #呼叫mydownload_image函式一個一個下載圖片
results.append(value)
if value == False:
print('Flase')
else:
print('1')
# Only keep rows with valid images, and write out training file lists.
print(len(results))
df = df[results]
for split in ['train', 'test']:
split_df = df[df['_split'] == split]
filename = os.path.join(training_dirname, '{}.txt'.format(split))
split_df[['image_filename', 'label']].to_csv(
filename, sep=' ', header=None, index=None)
print('Writing train/val for {} successfully downloaded images.'.format(
df.shape[0]))
執行python examples/finetune_flickr_style/assemble_data.py --workers=1 --images=2000 --seed 831486
命令成功下載Flickr Style資料,檢視caffe/data/flickr_style/images
目錄下載到資料集。