k近鄰法的C++實現

阿新 • • 發佈：2019-01-12

#include <iostream>
#include <vector>
#include <algorithm>
#include <string>
#include <cmath>
using namespace std;

struct KdTree{
    vector<double> root;
    KdTree* parent;
    KdTree* leftChild;
    KdTree* rightChild;
    //預設建構函式
    KdTree(){parent = leftChild = rightChild = NULL;}
    //判斷kd樹是否為空
    bool isEmpty()
    {
        return root.empty();
    }
    //判斷kd樹是否只是一個葉子結點
    bool isLeaf()
    {
        return (!root.empty()) &&
            rightChild == NULL && leftChild == NULL;
    }
    //判斷是否是樹的根結點
    bool isRoot()
    {
        return (!isEmpty()) && parent == NULL;
    }
    //判斷該子kd樹的根結點是否是其父kd樹的左結點
    bool isLeft()
    {
        return parent->leftChild->root == root;
    }
    //判斷該子kd樹的根結點是否是其父kd樹的右結點
    bool isRight()
    {
        return parent->rightChild->root == root;
    }
};

int data[6][2] = {{2,3},{5,4},{9,6},{4,7},{8,1},{7,2}};

template<typename T>
vector<vector<T> > Transpose(vector<vector<T> > Matrix)
{
    unsigned row = Matrix.size();
    unsigned col = Matrix[0].size();
    vector<vector<T> > Trans(col,vector<T>(row,0));
    for (unsigned i = 0; i < col; ++i)
    {
        for (unsigned j = 0; j < row; ++j)
        {
            Trans[i][j] = Matrix[j][i];
        }
    }
    return Trans;
}

template <typename T>
T findMiddleValue(vector<T> vec)
{
    sort(vec.begin(),vec.end());
    auto pos = vec.size() / 2;
    return vec[pos];
}

//構建kd樹
void buildKdTree(KdTree* tree, vector<vector<double> > data, unsigned depth)
{

    //樣本的數量
    unsigned samplesNum = data.size();
    //終止條件
    if (samplesNum == 0)
    {
        return;
    }
    if (samplesNum == 1)
    {
        tree->root = data[0];
        return;
    }
    //樣本的維度
    unsigned k = data[0].size();
    vector<vector<double> > transData = Transpose(data);
    //選擇切分屬性
    unsigned splitAttribute = depth % k;
    vector<double> splitAttributeValues = transData[splitAttribute];
    //選擇切分值
    double splitValue = findMiddleValue(splitAttributeValues);
    //cout << "splitValue" << splitValue << endl;

    // 根據選定的切分屬性和切分值，將資料集分為兩個子集
    vector<vector<double> > subset1;
    vector<vector<double> > subset2;
    for (unsigned i = 0; i < samplesNum; ++i)
    {
        if (splitAttributeValues[i] == splitValue && tree->root.empty())
            tree->root = data[i];
        else
        {
            if (splitAttributeValues[i] < splitValue)
                subset1.push_back(data[i]);
            else
                subset2.push_back(data[i]);
        }
    }

    //子集遞迴呼叫buildKdTree函式

    tree->leftChild = new KdTree;
    tree->leftChild->parent = tree;
    tree->rightChild = new KdTree;
    tree->rightChild->parent = tree;
    buildKdTree(tree->leftChild, subset1, depth + 1);
    buildKdTree(tree->rightChild, subset2, depth + 1);
}

//逐層列印kd樹
void printKdTree(KdTree *tree, unsigned depth)
{
    for (unsigned i = 0; i < depth; ++i)
        cout << "\t";

    for (vector<double>::size_type j = 0; j < tree->root.size(); ++j)
        cout << tree->root[j] << ",";
    cout << endl;
    if (tree->leftChild == NULL && tree->rightChild == NULL )//葉子節點
        return;
    else //非葉子節點
    {
        if (tree->leftChild != NULL)
        {
            for (unsigned i = 0; i < depth + 1; ++i)
                cout << "\t";
            cout << " left:";
            printKdTree(tree->leftChild, depth + 1);
        }

        cout << endl;
        if (tree->rightChild != NULL)
        {
            for (unsigned i = 0; i < depth + 1; ++i)
                cout << "\t";
            cout << "right:";
            printKdTree(tree->rightChild, depth + 1);
        }
        cout << endl;
    }
}

//計算空間中兩個點的距離
double measureDistance(vector<double> point1, vector<double> point2, unsigned method)
{
    if (point1.size() != point2.size())
    {
        cerr << "Dimensions don't match！！" ;
        exit(1);
    }
    switch (method)
    {
        case 0://歐氏距離
            {
                double res = 0;
                for (vector<double>::size_type i = 0; i < point1.size(); ++i)
                {
                    res += pow((point1[i] - point2[i]), 2);
                }
                return sqrt(res);
            }
        case 1://曼哈頓距離
            {
                double res = 0;
                for (vector<double>::size_type i = 0; i < point1.size(); ++i)
                {
                    res += abs(point1[i] - point2[i]);
                }
                return res;
            }
        default:
            {
                cerr << "Invalid method!!" << endl;
                return -1;
            }
    }
}
//在kd樹tree中搜索目標點goal的最近鄰
//輸入：目標點；已構造的kd樹
//輸出：目標點的最近鄰
vector<double> searchNearestNeighbor(vector<double> goal, KdTree *tree)
{
    /*第一步：在kd樹中找出包含目標點的葉子結點：從根結點出發，
    遞迴的向下訪問kd樹，若目標點的當前維的座標小於切分點的
    座標，則移動到左子結點，否則移動到右子結點，直到子結點為
    葉結點為止,以此葉子結點為“當前最近點”
    */
    unsigned k = tree->root.size();//計算出資料的維數
    unsigned d = 0;//維度初始化為0，即從第1維開始
    KdTree* currentTree = tree;
    vector<double> currentNearest = currentTree->root;
    while(!currentTree->isLeaf())
    {
        unsigned index = d % k;//計算當前維
        if (currentTree->rightChild->isEmpty() || goal[index] < currentNearest[index])
        {
            currentTree = currentTree->leftChild;
        }
        else
        {
            currentTree = currentTree->rightChild;
        }
        ++d;
    }
    currentNearest = currentTree->root;

    /*第二步：遞迴地向上回退，在每個結點進行如下操作：
    (a)如果該結點儲存的例項比當前最近點距離目標點更近，則以該例點為“當前最近點”
    (b)當前最近點一定存在於某結點一個子結點對應的區域，檢查該子結點的父結點的另
    一子結點對應區域是否有更近的點（即檢查另一子結點對應的區域是否與以目標點為球
    心、以目標點與“當前最近點”間的距離為半徑的球體相交）；如果相交，可能在另一
    個子結點對應的區域記憶體在距目標點更近的點，移動到另一個子結點，接著遞迴進行最
    近鄰搜尋；如果不相交，向上回退*/

    //當前最近鄰與目標點的距離
    double currentDistance = measureDistance(goal, currentNearest, 0);

    //如果當前子kd樹的根結點是其父結點的左孩子，則搜尋其父結點的右孩子結點所代表
    //的區域，反之亦反
    KdTree* searchDistrict;
    if (currentTree->isLeft())
    {
        if (currentTree->parent->rightChild == NULL)
            searchDistrict = currentTree;
        else
            searchDistrict = currentTree->parent->rightChild;
    }
    else
    {
        searchDistrict = currentTree->parent->leftChild;
    }

    //如果搜尋區域對應的子kd樹的根結點不是整個kd樹的根結點，繼續回退搜尋
    while (searchDistrict->parent != NULL)
    {
        //搜尋區域與目標點的最近距離
        double districtDistance = abs(goal[(d+1)%k] - searchDistrict->parent->root[(d+1)%k]);

        //如果“搜尋區域與目標點的最近距離”比“當前最近鄰與目標點的距離”短，表明搜尋
        //區域內可能存在距離目標點更近的點
        if (districtDistance < currentDistance )//&& !searchDistrict->isEmpty()
        {

            double parentDistance = measureDistance(goal, searchDistrict->parent->root, 0);

            if (parentDistance < currentDistance)
            {
                currentDistance = parentDistance;
                currentTree = searchDistrict->parent;
                currentNearest = currentTree->root;
            }
            if (!searchDistrict->isEmpty())
            {
                double rootDistance = measureDistance(goal, searchDistrict->root, 0);
                if (rootDistance < currentDistance)
                {
                    currentDistance = rootDistance;
                    currentTree = searchDistrict;
                    currentNearest = currentTree->root;
                }
            }
            if (searchDistrict->leftChild != NULL)
            {
                double leftDistance = measureDistance(goal, searchDistrict->leftChild->root, 0);
                if (leftDistance < currentDistance)
                {
                    currentDistance = leftDistance;
                    currentTree = searchDistrict;
                    currentNearest = currentTree->root;
                }
            }
            if (searchDistrict->rightChild != NULL)
            {
                double rightDistance = measureDistance(goal, searchDistrict->rightChild->root, 0);
                if (rightDistance < currentDistance)
                {
                    currentDistance = rightDistance;
                    currentTree = searchDistrict;
                    currentNearest = currentTree->root;
                }
            }
        }//end if

        if (searchDistrict->parent->parent != NULL)
        {
            searchDistrict = searchDistrict->parent->isLeft()?
                            searchDistrict->parent->parent->rightChild:
                            searchDistrict->parent->parent->leftChild;
        }
        else
        {
            searchDistrict = searchDistrict->parent;
        }
        ++d;
    }//end while
    return currentNearest;
}

int main()
{
    vector<vector<double> > train(6, vector<double>(2, 0));
    for (unsigned i = 0; i < 6; ++i)
        for (unsigned j = 0; j < 2; ++j)
            train[i][j] = data[i][j];

    KdTree* kdTree = new KdTree;
    buildKdTree(kdTree, train, 0);

    printKdTree(kdTree, 0);

    vector<double> goal;
    goal.push_back(3);
    goal.push_back(4.5);
    vector<double> nearestNeighbor = searchNearestNeighbor(goal, kdTree);
    vector<double>::iterator beg = nearestNeighbor.begin();
    cout << "The nearest neighbor is: ";
    while(beg != nearestNeighbor.end()) cout << *beg++ << ",";
    cout << endl;
    return 0;
}

k近鄰法的C++實現

統計學習方法筆記（一）：K近鄰法的實現：kd樹

機器學習基礎（四十三）—— kd 樹（ k 近鄰法的實現）

k近鄰法的C++實現

統計學習三：2.K近鄰法代碼實現（以最近鄰法為例）

機器學習系列：k 近鄰法（k-NN）的原理及實現

統計學習方法ｃ++實現之二　k近鄰法

k近鄰法：R實現(一)

K近鄰法之kd樹及其Python實現

AVLTree的實現算法(C++實現)

K近鄰法

基於私鑰加密公鑰解密的RSA算法C#實現方法

[劍指offer] 最小的K個數，C++實現

《統計學習方法》筆記三 k近鄰法

機器學習實戰——k-近鄰演算法Python實現問題記錄

第三章k近鄰法（接上篇）

【統計學習方法-李航-筆記總結】三、k近鄰法

資料結構與演算法之列舉（窮舉）法 C++實現

列主元Gauss消去法(C++實現)

K近鄰法(KNN)原理小結

K近鄰法-k-nearest neighbor,KNN

k近鄰法的C++實現

相關推薦