天池雪浪製造AI挑戰賽（初賽）

阿新 • • 發佈：2018-12-06

第一次參加比賽，記錄一下，我是直接使用遷移學習進行分類採用vgg16

排名不高僅供參考

import pandas as pd
import torch
import numpy as np
from torch.autograd import Variable
import torchvision
from torchvision import transforms, models
import matplotlib.pyplot as plt
import torch.nn.functional as F 
import os
from sklearn import metrics
import sys


system = sys.platform #判斷系統的，兩個電腦上 路徑不一樣
if system == 'win32':
    os.chdir('input')
mode = 'train'  # train用來訓練, test生成csv提交結果
# mode = 'test'


print('mode = ' + mode)

#這一塊是pytorch自帶的的載入資料夾圖片
transformer = transforms.Compose([
                                  transforms.Resize((224, 224)),
                                  # transforms.CenterCrop(200),
                                  # transforms.RandomVerticalFlip(),
                                  # transforms.RandomHorizontalFlip(),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])

train_data = {x: torchvision.datasets.ImageFolder(x, transform=transformer)
              for x in ['train', 'val']}

print(train_data['train'].class_to_idx)
train_loader = {}
train_loader['train'] = torch.utils.data.DataLoader(train_data['train'],
                                               batch_size=10,
                                               shuffle=True)
train_loader['val'] = torch.utils.data.DataLoader(train_data['val'],
                                               batch_size=10,
                                               shuffle=True)

print('train num is ' + str(len(train_data['train'])))
print('val num is ' + str(len(train_data['val'])))

if os.listdir('models'): #恢復模型
    print('restrore the model')
    model = torch.load('my_model.pkl')
else:
    print('use vgg16 model')

    # model = torch.load('vgg16.pkl') #因為網路不好， 我都是提前下下來儲存再載入
    # model = torch.load('vgg_11_bn.pkl')
    # models.vgg16_bn(pretrained=True, batch_norm)
  
    model.classifier = torch.nn.Sequential(
        torch.nn.Linear(7*7*512, 2), #vgg提取特徵不變  分類層改一下  

if torch.cuda.is_available(): #cpu gpu轉換
    model = model.cuda()
print(model)


loss_func = torch.nn.CrossEntropyLoss()
lr = 1e-5

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

## 建立這些列表基本都是用來畫圖的
epochs = 30 
plot_loss = []
best_auc = 0
auc_list = []
auc_list2 = []
train_acc_list = []
test_acc_list = []
# plt.ion()

def valling(dir_name, model):
    """
    得到網路輸出   用來metrics
    0 1標籤（用來算正確率）
    概率（算auc）
    label
    """
    model.eval()
    print('valling in ' + str(dir_name))
    y_pre_all = np.array(())
    test_y_all = np.array(())
    all_pro = np.array(())
    for tep_idx, [test_x, test_y] in enumerate(train_loader[dir_name]):
        if tep_idx <= 10:
            test_x, test_y = next(iter(train_loader[dir_name]))
            if torch.cuda.is_available():
                test_x, test_y = (test_x.cuda()), (test_y.cuda())

            y_out_test = model(test_x)

            all_pro = np.append(all_pro, F.softmax(y_out_test, 0).cpu().data.numpy()[:, 1])
            # print(y_out_test)
            y_pre_test = torch.argmax(y_out_test, 1)

            y_pre_test = y_pre_test.cpu().data.numpy()
            test_y = test_y.cpu().data.numpy()

            # print(y_pre_all.shape)
            # print(y_pre_test.shape)
            y_pre_all = np.append(y_pre_all, y_pre_test)
            test_y_all = np.append(test_y_all, test_y)
            # print(y_pre_all.shape)
    return y_pre_all, test_y_all, all_pro


def my_metrics(pre, label, pro):
    '''
    計算auc  acc
    '''
    # print('label shape is ' + str(label.shape))
    # print('pro shape is ' + str(pro.shape))
    auc = metrics.roc_auc_score(label, pro)
    bool_arr_test = (pre == label) 
    test_acc = np.sum(bool_arr_test) / pre.size
    return auc, test_acc


def plot_list(list1, list2, dir_, title):
    '''
    畫圖  train 和test的acc  auc
    '''
    abs_dir = os.path.abspath(dir_)
    if not os.path.exists(os.path.dirname(abs_dir)):
        os.mkdir(os.path.dirname(abs_dir))
        print('creat dir{}'.format(abs_dir))
    plt.figure()
    plt.plot(list1, label='train')
    plt.plot(list2, label='test')
    plt.title(title)
    plt.legend(loc='best')
    plt.savefig(dir_)
    plt.close()

if mode =='train':
    best_acc = 0
    plot_epoch_loss = []
    # print(model)
    for epoch in range(epochs):
        model.train()
        print('training')
        batch = 0
        epoch_loss = 0
        correct = 0
        # print(train_loader['train'])
        for data in train_loader['train']:
            batch += 1
            x, y = data
            if torch.cuda.is_available():
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)
            y_out = model(x)
            optimizer.zero_grad()
            loss = loss_func(y_out, y)
            epoch_loss += loss
            # print(loss.data)
            # print(loss.data[0])
            loss.backward()
            optimizer.step()

            a_loss = loss.cpu().data.numpy()
            plot_loss.append(a_loss)
            plt.cla()
            plt.plot(plot_loss)
            print(a_loss)
            plt.text(0, 0.5, 'loss = %.3f' % a_loss, {'color': 'red', 'size': 15})
            plt.savefig('loss2.png')
            plt.close()
            plt.pause(0.5)


        y_pre_all, test_y_all, all_pro = valling('val', model)
        train_y_pre_all, train_test_y_all, train_all_pro = valling('train', model)

        auc, test_acc = my_metrics(y_pre_all, test_y_all, all_pro)
        train_auc, train_test_acc = my_metrics(train_y_pre_all, train_test_y_all, train_all_pro)

        train_acc_list.append(train_test_acc)
        test_acc_list.append(test_acc)

        saved_figs_dir = 'vgg11_full_32' 
        plot_list(train_acc_list, test_acc_list, os.path.join('saved_figs', saved_figs_dir, 'acc.png'), 'acc_curve')

        auc_list.append(auc)
        auc_list2.append(train_auc)
        plot_list(auc_list2, auc_list, os.path.join('saved_figs', saved_figs_dir, 'auc.png'), 'auc_curve')

        best_acc = max(best_acc, test_acc) #儲存最好的結果
        best_auc = max(best_auc, auc)

        print('test_acc = ' + str(test_acc * 100)[:4] + '%')
        print('train_acc = ' + str(train_test_acc * 100)[:4] + '%')
        epoch_loss = epoch_loss.cpu().data.numpy()
        print('This ' + str(epoch) + 'th epoch', 'epoch average loss = ' + str(epoch_loss/(batch)))
        plot_epoch_loss.append(epoch_loss / (batch))
        plt.figure()
        plt.plot(plot_epoch_loss)
        plt.title('epoch_loss')
        plt.savefig(os.path.join('saved_figs', saved_figs_dir, 'epoch_loss.png'))
        # plt.savefig('saved_figs/2/epoch_loss.png')
        print('lr = {}'.format(lr))
        if best_acc <= test_acc: #存正確率最高的模型
        # if best_auc <= auc:#存auc最高的
            print('score is better  store model')
            torch.save(model, 'models/my_model.pkl')
        else:
            print("not good don't save")
        print('-' * 40)    


else:
    #用來生成提交結果
    test_data = torchvision.datasets.ImageFolder('test', transform=transformer)
    test_data_loader = torch.utils.data.DataLoader(
                                        test_data, 
                                        batch_size=10,
                                        shuffle=False)
    ret_df = pd.DataFrame(columns=['filename', 'probability'])
 
    filenames = []
    for i in test_data.imgs:
        filename = os.path.basename(i[0])
        filenames.append(filename)

    # print(filenames)
    ret_df['filename'] = filenames
    for i, [x, y] in enumerate(test_data_loader):
        if torch.cuda.is_available():
            x = x.cuda()
        x = Variable(x)

        pre_out = model(x)
        pro = F.softmax(pre_out).cpu().data.numpy()[:, 1]
        pro = np.clip(pro, 0.000001, 0.999999)
        print('The ' + str(i*10) + ' th ' + 'row')

        try:
            ret_df.iloc[10*i: 10*i+10, 1] = pro
        except Exception:
            ret_df.loc[10*i:, 'probability'] = pro

    ret_df = ret_df.round(6)
    print((ret_df['probability'] <= 0).sum())
    print((ret_df['probability'] >= 1).sum())
    ret_df.to_csv('outputs/submission.csv', index=False, encoding='utf-8')

新人學習中

天池雪浪製造AI挑戰賽（初賽）

第一次參加比賽，記錄一下，我是直接使用遷移學習進行分類採用vgg16 排名不高僅供參考 import pandas as pd import torch import numpy as np from torch.autograd import Variable import tor

Python抓取新浪新聞數據（二）

Python抓取新浪新聞數據以下是抓取的完整代碼(抓取了網頁的title,newssource,dt,article,editor,comments)舉例：Python抓取新浪新聞數據（二）

Python抓取新浪新聞數據（三）

Python抓取新浪新聞數據非同步載入一般在XHR下查找，但是沒有發現XHR下有相關內容。 Python抓取新浪新聞數據（三）

201771010120 蘇浪浪面向物件程式設計（Java）第10周

1、實驗目的與要求 (1) 理解泛型概念； (2) 掌握泛型類的定義與使用； (3) 掌握泛型方法的宣告與使用； (4) 掌握泛型介面的定義與實現； (5)瞭解泛型程式設計，理解其用途。 2、實驗內容和步驟實驗1：匯入第8章示例程式，測試程式並進行程式碼註釋。測試程式1：

搭建AI機器人（筆記）

公眾號申請註冊一個公眾號首先進入微信公眾號平臺，然後註冊一個賬號，型別選擇訂閱號，然後根據相應的提示完成資訊的填寫和驗證。具體操作可參考以下視訊。後臺伺服器的搭建安裝 NodeJS 首先執行以下命令 sudo su 下載最新的穩定版 v6.10.3 到本地 wgethtt

AI探索（二）Tensorflow環境準備

Python + Tensorflow環境安裝 Tensorflow支援Windows/Mac/Linux等三種作業系統，其中windows下python需要安裝3.5以上的版本 Mac/Linux自帶的python 2.7可以支援安裝Tensorflow #1. Python安裝

華為AI隨筆（7）

>>> x=[1,2,3] >>> y=[11,22,33] >>> import copy >>> z=x >>&g

AI探索（三）Tensorflow程式設計模型

Tensorflow程式設計模型。。。。後續完善 import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import numpy as np num_points = 1000 data_array = [] for i in x

華為雲AI隨筆（15）

remove刪除元素如上。 del如下。pop彈出元素並返回。 print(x) [1, 2, 5, 6, 8, 4, 3, 5] del x[3] print(x) [1, 2, 5, 8, 4,

AI探索（四）NumPy庫的使用

NumPy(Numerical Python) 是 Python 語言的一個擴充套件程式庫，支援大量的維度陣列與矩陣運算，此外也針對陣列運算提供大量的數學函式庫。 umPy 是一個執行速度非常快的數學庫，主要用於陣列計算，包含：一個強大的N維陣列物件 ndarray 廣播功能函式整合

UE4的AI學習（2）——官方案例例項分析

http://blog.csdn.net/a393134314/article/details/52301309 AI學習當中，不學習行為樹基本概念就不能明白具體例項中的操作意義，但是沒有經過具體例項實驗，又覺得基本概念抽象難以理解。建議先泛讀（1）（2）後再對

Dota2 AI 開發（二）定製AI陣容配置英雄出裝

在Dota2 AI 開發（一）環境配置中介紹瞭如何搭建 Dota2 AI 的開發環境，在這篇文章中，主要介紹Dota2中AI的常規控制方式，並介紹如何在人機比賽中配置一個裸跳刀的Sven。常用指令重新載入Lua指令碼：dota_bot_reload_scripts加速遊戲：

象棋AI演算法（一）

一，最小-最大搜索Minimax Search 首先：最小與最大是相對的，且只針對一方，AI中即為有利於AI 象棋AI中的最小最大搜索：簡單來講就是該AI走了，窮舉這個過程中對於AI來說的最佳（最大）走法對於我來說最差（最小）的走法。而這個走法就是我們所要找的AI的最佳走法。這個過程就跟你與別人下象棋

國際象棋AI設計（一）

環境配置測試使用說明使用命令列執行 py start.py執行在命令列介面，列印字元棋盤對弈開始時， AI 接受一個輸入（w/b），指示AI執白手或黑手著子的輸入輸出依照修正後的標準代數記譜法SAN,如Nh3,表示馬（Knight）

國際象棋AI設計（二）----搜尋

Alpha-Beta剪枝 Alpha-Beta 同“MinMax”非常相似，區別主要在於 MinMax 執行時要檢查整個博弈樹，然後儘可能選擇最好的線路。 Alpha-Beta 則是在MinMax的基礎上把一些不必要的分支剪去，加快搜索速度。 Alph

新浪前端面試題（二）

兩道演算法題: 一、給出一個演算法，輸入為十六進位制或十進位制的字串，輸出為對應的十進位制或十六進位制的字串。例 '100'=>'aa', 'ff' =>'255'; //這道題給的不是很好，首先題目中沒有給出16進位制的特徵，造成無法判別16進位制還是10進位

看雪學院-OllyDBG入門系列（四）記憶體寫入筆記

看雪學院-OllyDBG入門系列（四）記憶體斷點筆記作者：CCDebuger 在 OllyDBG 中一般我們用到的記憶體斷點有記憶體訪問和記憶體寫入斷點。記憶體訪問斷點就是指程式訪問記憶體中我

【學習筆記4】Convolutional Pose Mashines在FashionAI中的應用【第二彈】——我的深度學習首秀（天池FashionAI關鍵點挑戰賽複賽篇）

又持續煉了一個月左右的丹，今天覆賽B階段結束了，老衲總算熬到頭了。和一群“仙人”競賽，既壓力山大，又動力滿滿。壓力山大是前排“大仙”令人驚歎的NE值以及Top20的誘人獎勵，畢竟自己曾經那麼接近Top20；動力滿滿是自己一直是抱著學習的態度把這次比賽當做一個實踐專案在做，能得

計蒜之道初賽第一場-阿里天池的新任務（簡單）

阿里“天池”競賽平臺近日推出了一個新的挑戰任務：對於給定的一串 DNA 鹼基序列 tt，判斷它在另一個根據規則生成的 DNA 鹼基序列 ss 中出現了多少次。首先，定義一個序列 ww：

雪飲者決策樹系列（二）決策樹應用

ssi 字符串長度 mes pla 選擇 font com vector nac 　　本篇以信息增益最大作為最優化策略來詳細介紹決策樹的決策流程。　　首先給定數據集，見下圖　　註：本數據來源於網絡本篇將以這些數據作為訓練數據（雖然少，但足以介紹清楚原理！），下圖是決

天池雪浪製造AI挑戰賽（初賽）

相關推薦