sklearn kMeans 分類實戰，對滬深300的每日漲跌進行分類

阿新 • • 發佈：2018-12-12

# ohlc_clustering.py

import copy
import datetime
import pymysql

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
from matplotlib.dates import (
    DateFormatter, WeekdayLocator, DayLocator, MONDAY
) 

import mpl_finance as mpf
import numpy as np
import pandas as pd
import pandas_datareader.data as web
from sklearn.cluster import KMeans

def get_open_normalised_prices():
    """
    Obtains a pandas DataFrame containing open normalised prices
    for high, low and close for a particular equities symbol
    from Yahoo Finance. That is, it creates High/Open, Low/Open
    and Close/Open columns.
    """ 

    # df = web.DataReader(symbol, "yahoo", start, end)

    connect = pymysql.connect(
        host='127.0.0.1',
        db='blog',
        user='root',
        passwd='123456',
        charset='utf8',
        use_unicode=True
    )
    select_sql_300 = "select date as Date,open as Open,high as High,low as Low,adj_close as Close from `tmp_stock` where code ='399300' and date >= '2004-6-01'  order by date asc" 

    df = pd.read_sql(select_sql_300, con=connect)

    df["H/O"] = df["High"]/df["Open"]
    df["L/O"] = df["Low"]/df["Open"]
    df["C/O"] = df["Close"]/df["Open"]
    df.drop(
        [
            "Open", "High", "Low",
            "Close", "Date"
        ],
        axis=1, inplace=True
    )
    return df

def plot_candlesticks(data):
    """
    Plot a candlestick chart of the prices,
    appropriately formatted for dates
    """
    # Copy and reset the index of the dataframe
    # to only use a subset of the data for plotting
    df = copy.deepcopy(data)
    # df = df[df.index >= since]
    df.reset_index(inplace=True)
    df['date_fmt'] = df['Date'].apply(
        lambda date: mdates.date2num(date.to_pydatetime())
    )

    # Set the axis formatting correctly for dates
    # with Mondays highlighted as a "major" tick
    mondays = WeekdayLocator(MONDAY)
    alldays = DayLocator()
    weekFormatter = DateFormatter('%b %d')
    fig, ax = plt.subplots(figsize=(16,4))
    fig.subplots_adjust(bottom=0.2)
    # ax.xaxis.set_major_locator(mondays)
    # ax.xaxis.set_minor_locator(alldays)
    # ax.xaxis.set_major_formatter(weekFormatter)

    # Plot the candlestick OHLC chart using black for
    # up days and red for down days
    csticks = mpf.candlestick_ohlc(
        ax, df[
            ['date_fmt', 'Open', 'High', 'Low', 'Close']
        ].values, width=0.6,
        colorup='r', colordown='green'
    )
    # ax.set_axis_bgcolor((1,1,0.9))
    ax.xaxis_date()
    # plt.setp(
    #     plt.gca().get_xticklabels(),
    #     rotation=45, horizontalalignment='right'
    # )
    plt.show()


def plot_cluster(data):
    df = copy.deepcopy(data)
    # df = df[df.index >= since]
    df.reset_index(inplace=True)
    df['date_fmt'] = df['Date'].apply(
        lambda date: mdates.date2num(date.to_pydatetime())
    )

    # Set the axis formatting correctly for dates
    # with Mondays highlighted as a "major" tick
    mondays = WeekdayLocator(MONDAY)
    alldays = DayLocator()
    weekFormatter = DateFormatter('%b %d')
    fig, ax = plt.subplots(figsize=(16, 4))
    fig.subplots_adjust(bottom=0.2)
    # ax.xaxis.set_major_locator(mondays)
    # ax.xaxis.set_minor_locator(alldays)
    # ax.xaxis.set_major_formatter(weekFormatter)

    df0 = df.loc[df["Cluster"] == 0]
    df1 = df.loc[df["Cluster"] == 1]
    df2 = df.loc[df["Cluster"] == 2]
    df3 = df.loc[df["Cluster"] == 3]

    size = 1.2
    ax.scatter(df0['date_fmt'], df0['Close'], s=size, c='y',marker='o',label="Small Rise")
    ax.scatter(df1['date_fmt'], df1['Close'], s=size, c='g', marker='o', label="Big Down")
    ax.scatter(df2['date_fmt'], df2['Close'], s=size, c='r', marker='o', label="Big Rise")
    ax.scatter(df3['date_fmt'], df3['Close'], s=size, c='b', marker='o', label="Small Down")

    ax.xaxis_date()
    plt.xlabel('Date')
    plt.ylabel('Close')
    plt.legend(loc='upper right')

    # plt.setp(
    #     plt.gca().get_xticklabels(),
    #     rotation=45, horizontalalignment='right'
    # )
    plt.show()

def plot_3d_normalised_candles(data):
    """
    Plot a 3D scatterchart of the open-normalised bars
    highlighting the separate clusters by colour
    """
    fig = plt.figure(figsize=(12, 9))
    ax = Axes3D(fig, elev=21, azim=-136)
    ax.scatter(
        data["H/O"], data["L/O"], data["C/O"],
        c=labels.astype(np.float)
    )
    ax.set_xlabel('High/Open')
    ax.set_ylabel('Low/Open')
    ax.set_zlabel('Close/Open')
    plt.show()

def plot_cluster_ordered_candles(data):
    """
    Plot a candlestick chart ordered by cluster membership
    with the dotted blue line representing each cluster
    boundary.
    """
    # Set the format for the axis to account for dates
    # correctly, particularly Monday as a major tick
    mondays = WeekdayLocator(MONDAY)
    alldays = DayLocator()
    weekFormatter = DateFormatter("")
    fig, ax = plt.subplots(figsize=(16,4))
    ax.xaxis.set_major_locator(mondays)
    ax.xaxis.set_minor_locator(alldays)
    ax.xaxis.set_major_formatter(weekFormatter)

    # Sort the data by the cluster values and obtain
    # a separate DataFrame listing the index values at
    # which the cluster boundaries change
    df = copy.deepcopy(data)
    df.sort_values(by="Cluster", inplace=True)
    df.reset_index(inplace=True)
    df["clust_index"] = df.index
    df["clust_change"] = df["Cluster"].diff()
    change_indices = df[df["clust_change"] != 0]

    # Plot the OHLC chart with cluster-ordered "candles"
    csticks = mpf.candlestick_ohlc(
        ax, df[
            ["clust_index", 'Open', 'High', 'Low', 'Close']
        ].values, width=0.6,
        colorup='#000000', colordown='#ff0000'
    )
    # ax.set_axis_bgcolor((1,1,0.9))

    # Add each of the cluster boundaries as a blue dotted line
    for row in change_indices.iterrows():
        plt.axvline(
            row[1]["clust_index"],
            linestyle="dashed", c="blue"
        )
    plt.xlim(0, len(df))
    plt.setp(
        plt.gca().get_xticklabels(),
        rotation=45, horizontalalignment='right'
    )
    plt.show()

def create_follow_cluster_matrix(data):
    """
    Creates a k x k matrix, where k is the number of clusters
    that shows when cluster j follows cluster i.
    """
    data["ClusterTomorrow"] = data["Cluster"].shift(-1)
    data.dropna(inplace=True)
    data["ClusterTomorrow"] = data["ClusterTomorrow"].apply(int)
    hs300["ClusterMatrix"] = list(zip(data["Cluster"], data["ClusterTomorrow"]))
    cmvc = data["ClusterMatrix"].value_counts()
    clust_mat = np.zeros( (k, k) )
    for row in cmvc.iteritems():
        clust_mat[row[0]] = row[1]*100.0/len(data)
    print("Cluster Follow-on Matrix:")
    print(clust_mat)


if __name__ == "__main__":
    # Obtain S&P500 pricing data from Yahoo Finance

    connect = pymysql.connect(
        host='127.0.0.1',
        db='blog',
        user='root',
        passwd='123456',
        charset='utf8',
        use_unicode=True
    )
    select_sql_300 = "select date as Date,open as Open,high as High,low as Low,adj_close as Close from `tmp_stock` where code ='399300' and date >= '2004-6-01'  order by date asc"
    hs300 = pd.read_sql(select_sql_300, con=connect)


    # # Plot last year of price "candles"
    plot_candlesticks(hs300)

    # Carry out K-Means clustering with four clusters on the
    # three-dimensional data H/O, L/O and C/O
    hs300_norm = get_open_normalised_prices()
    k = 4
    km = KMeans(n_clusters=k, random_state=42)
    km.fit(hs300_norm)
    labels = km.labels_
    hs300_norm["Cluster"] = labels
    hs300["Cluster"] = labels


    #
    # # Plot the 3D normalised candles using H/O, L/O, C/O
    plot_3d_normalised_candles(hs300_norm)


    # Create and output the cluster follow-on matrix
    create_follow_cluster_matrix(hs300)

    plot_cluster(hs300)

在這裡插入圖片描述

sklearn kMeans 分類實戰，對滬深300的每日漲跌進行分類

# ohlc_clustering.py import copy import datetime import pymysql import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import

使用nagios監控交換機端口流量，對低於閾值的流量進行報警

交換機 nagios snmp 需求：使用nagios服務需要對一臺思科交換機的24端口進行流量監控，當流量低於2MB/s時，發送報警；當流量高於3MB/s時，報警取消；當流量介於2MB/s-3MB/s時，處於警告warning狀態。操作方法：第一：編寫腳本文件：vim /usr/lib64/na

keras對貓、狗資料集進行分類（三）

使用已訓練模型對貓狗圖片進行測試，以及視覺化模型訓練過程。示例程式碼： # # 視覺化卷積神經網路 # # 人們常說，深度學習模型是“黑盒子”，學習表示難以提取並以人類可讀的形式呈現。 # 雖然對於某些型別的深度學習模型來說這是部分正確的，但對於小行星來說絕對不是這樣。 # 由con

學生類，含學生姓名與成績，用友元函式訪問私有成員，對兩個物件的成績進行比較。（2018.9.19 c++作業）

定義兩個物件，與一個友元函式（使用c++中引用傳遞的方式，實現引數的傳遞） #include using namespace std; class student { private: char name[20]; float grade; public: s

滬深個股每日成交明細下載(3秒)

2018年10月份，滬深個股每日成交明細連結分享，需要的可以在此下載：提取碼：vsb1 格式：txt 可提供資料日期：2018-09-28起資料重新整理頻率：3秒（3秒內

P51.2編寫一個程式，對使用者錄入的產品資訊進行格式化。

編寫一個程式，對使用者錄入的產品資訊進行格式化。 #include<stdio.h> int main(void) { int mm, dd, yyyy, a; double b; printf("Enter item num

文字分類實戰（十）—— BERT 預訓練模型文字分類實戰（一）—— word2vec預訓練詞向量

1 大綱概述　　文字分類這個系列將會有十篇左右，包括基於word2vec預訓練的文字分類，與及基於最新的預訓練模型（ELMo，BERT等）的文字分類。總共有以下系列：　　word2vec預訓練詞向量　　textCNN 模型　　charCNN 模型　　Bi-LSTM 模型　　Bi-LST

利用決策樹對微信公眾號文字進行分類

問題背景：公眾平臺的公眾號每天都會發布大量文章，通過群發圖文的形式向用戶每天推送內容。由於公眾號面向的使用者群體、專注的領域不同，圖文內容也差異很大。一些公眾號主要釋出時事政治內容，而另外一些公眾號主要釋出遊戲的視訊與周邊等。識別公眾號的內容對公眾平臺的運營與新場景應用發揮重

巧妙解決mysql刪除或新增記錄時，對自增主鍵ID進行重新排序的問題

一、問題描述表一 id 姓名 1 張三 2 李四表二(當刪除第一條記錄時，顯示為表二) id 姓名 2 李四表三（如何使表顯示為表三，即實現對錶中的id欄位重新排序） id 姓名 1 李四二、問題解決原理：重新定義一個變數用來顯示錶中的序列號 echo "<

sklearn實戰：對文件進行聚類分析（KMeans演算法）

%matplotlib inline import matplotlib.pyplot as plt import numpy as np from time import time from sklearn.datasets import load_fi

python3 簡單實現從csv文件中讀取內容，並對內容進行分類統計

tmp spa writer ict 打開文件 while 類型 spl blog 新手python剛剛上路，在實際工作中遇到如題所示的問題，嘗試使用python3簡單實現如下，歡迎高手前來優化import csv #打開文件，用with打開可以不用去特意關閉file了

給定一系列正整數，請按要求對數字進行分類，

數字分類題目鏈接https://www.nowcoder.com/pat/6/problem/4078題目描述給定一系列正整數，請按要求對數字進行分類，並輸出以下5個數字： A1 = 能被5整除的數字中所有偶數的和； A2 = 將被5除後余1的數字按給出順序進行交錯求和，即計算n1-n2+n3-n4.

語義分割(semantic segmentation) 常用神經網絡介紹對比-FCN SegNet U-net DeconvNet，語義分割,簡單來說就是給定一張圖片,對圖片中的每一個像素點進行分類；目標檢測只有兩類,目標和非目標，就是在一張圖片中找到並用box標註出所有的目標.

avi projects div 般的 ict 中間接受 img dense from：https://blog.csdn.net/u012931582/article/details/70314859 2017年04月21日 14:54:10 閱讀數：4369

sklearn kMeans 分類實戰，對滬深300的每日漲跌進行分類

sklearn kMeans 分類實戰，對滬深300的每日漲跌進行分類

使用nagios監控交換機端口流量，對低於閾值的流量進行報警

keras對貓、狗資料集進行分類（三）

學生類，含學生姓名與成績，用友元函式訪問私有成員，對兩個物件的成績進行比較。（2018.9.19 c++作業）

滬深個股每日成交明細下載(3秒)

P51.2編寫一個程式，對使用者錄入的產品資訊進行格式化。

文字分類實戰（十）—— BERT 預訓練模型文字分類實戰（一）—— word2vec預訓練詞向量

利用決策樹對微信公眾號文字進行分類

巧妙解決mysql刪除或新增記錄時，對自增主鍵ID進行重新排序的問題

sklearn實戰：對文件進行聚類分析（KMeans演算法）

python3 簡單實現從csv文件中讀取內容，並對內容進行分類統計

給定一系列正整數，請按要求對數字進行分類，

[PyTorch小試牛刀]實戰二·實現邏輯迴歸對鳶尾花進行分類

sklearn KMeans 分類

TensorFlow深度學習實戰（一）：AlexNet對MNIST資料集進行分類

[PyTorch小試牛刀]實戰三·DNN實現邏輯迴歸對FashionMNIST資料集進行分類

怎樣用excel按進行分類求和，最後再根據一列對其他列進行排序

Scikit-Learn（sklearn）中的KNeighborsClassifier對鳶尾花進行分類

使用LogisticRegression和SGDClassifier對良/惡性腫瘤進行分類，並計算出準確率召回率和F1的值

sklearn kMeans 分類實戰，對滬深300的每日漲跌進行分類

相關推薦