[OpenCV] 基於聚類的視訊關鍵幀提取

阿新 • • 發佈：2018-12-25

參考論文：

1.《用非監督式聚類進行視訊鏡頭分割》

2.《一種基於視訊聚類的關鍵幀提取方法》

右邊為提取出來的關鍵幀

聚類的基本思想是，先把視訊聚成n個類，這n個類內的視訊幀是相似的，而類與類之間的視訊幀是不相似的。第二步是從每個類內提取一個代表作為關鍵幀，另外，如果一個類的幀數太少，那麼這個類不具有代表性，可以直接與相鄰幀合併。

因為HSV空間相比起RGB空間對顏色特性有著更好的支援，所以第一步我們先把顏色對映到HSV空間上。

首先，把分佈在0~255的RGB顏色直接對映到0~255

的HSV顏色。接下來，我們對HSV顏色再進行一次分割，即把H分量等分為12塊，S,V分量各等分為5塊，再把原來範圍在0 ~255的顏色對映到12X5X5的範圍上。

完成對映後，接下來需要構建HSV的顏色空間。我們設影象的大小為為MXN。然後分別統計H,S,V分量中，值為i的佔的百分比為多少。

計算兩張影象的相似度，需要我們先分別計算三個顏色直方圖的相似性，具體的計算方法是累加兩張影象直方圖相同索引處對應的最小值。

又因為人眼對H分量的敏感程度大於對S分量，而對

S分量的敏感程度又大於對V分量，所以最終我們設H的權重為0.5，S為0.3，V為0.2。

接下來，開始具體的計算。

1.對於每個類，維護一個質心：

2.對於每一幀，計算它聚類質心的相似度（根據前面提到的相似度公式）

如果相似度小於某一閾值，那麼把它歸到一個新建的類中，否則加入之前的類中。

3.合併一部分過小的聚類。

4.計算每個聚類中熵最大的影象，將其作為關鍵幀，計算方法：

#include <opencv2/opencv.hpp>
#include<string>
#include<io.h>
#include<list>
#include<array>
using namespace std;
using namespace cv;
#define NUM_FRAME 300
#define SIZE 7

char path[100];//輸入檔案路徑

struct shot
{
	list<array<float, 22> >content;
	list<int> id;
	array<float, 22> center;
};


float similarity(array<float, 22>  x1, array<float, 22>  x2)
{
	float s1 = 0, s2 = 0, s3 = 0;
	float alpha1 = 0.5, alpha2 = 0.3, alpha3 = 0.2;
	for (int i = 0; i < 12; i++) {
		s1 += min(x1[i], x2[i]);
	}
	for (int i = 12; i < 17; i++) {
		s2 += min(x1[i], x2[i]);
	}
	for (int i = 17; i < 22; i++) {
		s3 += min(x1[i], x2[i]);
	}
	return s1*alpha1 + s2*alpha2 + s3*alpha3;
}

int findMaxEntropyId(list<array<float, 22> >x,list<int> y)
{
	float s1,s2,s3,max;

	list<array<float, 22> >::iterator it;
	list<int>::iterator i = y.begin();
	int id = 0;
	
	for (it = x.begin(); it != x.end(); it++,i++) {
		s1 = 0.0f, s2 = 0.0f, s3 = 0.0f, max = 0.0f;
		for (int j = 0; j < 12; j++) {
			if ((*it)[j] != 0)s1 += -(*it)[j] * log((*it)[j])/log(2);
		}
		for (int j = 12; j < 17; j++) {
			if ((*it)[j] != 0)s2 += -(*it)[j] * log((*it)[j])/log(2);
		}
		for (int j = 17; j < 22; j++) {
			if ((*it)[j] != 0)s3 += -(*it)[j] * log((*it)[j])/log(2);
		}
		float s = 0.5f*s1 + 0.3f*s2 + 0.2f*s3;
		//printf("s = %f\n", s);
		if (s>max) {
			max = s;
			id = *i;
		}
	}
	return id;
}

const array<float, 22> operator +(const array<float, 22>  &x, const array<float, 22>  &y)
{
	array<float, 22>ans;
	for (int i = 0; i < 22; i++) {
		ans[i] = x[i] + y[i];
	}
	return ans;
}

const array<float, 22> operator /(const array<float, 22>  &x, int s)
{
	array<float, 22>ans;
	for (int i = 0; i < 22; i++) {
		ans[i] = x[i] / s;
	}
	return ans;
}

void combine(vector<shot>& Shot, int i, int j)
{
	list<array<float, 22> >::iterator it;
	list<int>::iterator k = Shot[j].id.begin();
	vector<shot>::iterator v = Shot.begin() + j;
	for (it = Shot[j].content.begin(); it != Shot[j].content.end(); it++,k++) {
		Shot[i].content.push_back(*it);
		Shot[i].center = *it + Shot[i].center;
		Shot[i].id.push_back(*k);
	}
	Shot.erase(v);
}

array<float, 22> sum(list<array<float, 22> >& arr)
{
	array<float, 22> ans = { 0 };
	list<array<float, 22> >::iterator it;
	for (it = arr.begin(); it != arr.end(); it++) {
		for (int i = 0; i < 22; i++) {
			ans[i] += (*it)[i];
		}
	}
	return ans;
}

//將圖片序列轉換為視訊
void handleVideo()
{
	int i = 0;
	IplImage* img = 0;//讀入影象
	IplImage* outimg = 0;//修改影象尺寸
	char image_name[100];//影象名字
	char videoname[100];
	strcpy(videoname, "1.avi");

	//從檔案讀入視訊
	CvCapture* capture = cvCaptureFromAVI(videoname);
	//讀取和顯示
	IplImage* frameimg;//從視訊中提取的幀影象
	int fps = (int)cvGetCaptureProperty(capture, CV_CAP_PROP_FPS);//視訊的fps
	int frameH = (int)cvGetCaptureProperty(capture, CV_CAP_PROP_FRAME_HEIGHT);//視訊的高度
	int frameW = (int)cvGetCaptureProperty(capture, CV_CAP_PROP_FRAME_WIDTH);//視訊的寬度
	printf("\tvideo height : %d\n\tvideo width : %d\n\tfps : %d\n", frameH, frameW, fps);

	list<array<float, 22> >colorbar;

	//建立視窗  
	cvNamedWindow("mainWin", CV_WINDOW_AUTOSIZE);
	//讀入圖片，並製作幻燈片
	while (1)
	{
		frameimg = cvQueryFrame(capture); //獲取一幀圖片

		if (!frameimg)break;//讀到盡頭，退出
		cvCvtColor(frameimg, frameimg, CV_BGR2HSV);
		array<float, 22> color = { 0 };
		uchar* data = (uchar *)frameimg->imageData;
		int step = frameimg->widthStep / sizeof(uchar);
		int channels = frameimg->nChannels;
		uchar* h = new uchar[frameimg->height*frameimg->width];
		uchar* s = new uchar[frameimg->height*frameimg->width];
		uchar* v = new uchar[frameimg->height*frameimg->width];
		for (int i = 0; i < frameimg->height; i++) {
			for (int j = 0; j < frameimg->width; j++) {
				h[i*frameimg->height + j] = data[i*step + j*channels + 0] / 21;
				if (h[i*frameimg->height + j] > 11)h[i*frameimg->height + j] = 11;
				s[i*frameimg->height + j] = data[i*step + j*channels + 1] / 51;
				if (s[i*frameimg->height + j] > 4)s[i*frameimg->height + j] = 4;
				v[i*frameimg->height + j] = data[i*step + j*channels + 2] / 51;
				if (v[i*frameimg->height + j] > 4)v[i*frameimg->height + j] = 4;

				color[h[i*frameimg->height + j]]++;
				color[12 + s[i*frameimg->height + j]]++;
				color[17 + v[i*frameimg->height + j]]++;
			}
		}
		for (int i = 0; i < 22; i++) {
			color[i] /= frameimg->height*frameimg->width;

		}
		colorbar.push_back(color);
	}
	float threshold = 0.8f;
	list<array<float, 22> >::iterator it = colorbar.begin();
	it++;
	vector<shot>Shot;

	//放入第一幀
	shot first;
	first.content.push_back(*colorbar.begin());
	first.center = *colorbar.begin();
	first.id.push_back(0);
	Shot.push_back(first);

	int count = 0;
	int num = 1;
	int index = 0;
	float max = 0;
	for (; it != colorbar.end(); it++) {
		max = 0;
		index = 0;
		//計算相似度最大的
		for (int i = 0; i < num; i++) {
			float ratio = similarity(*it, Shot[i].center);
			if (ratio > max) {
				max = ratio;
				index = i;
			}
		}
		//如果最大的小於某個閾值，則新建一個聚類
		if (max < threshold) {
			num++;
			shot newshot;
			newshot.center = *it;
			newshot.content.push_back(*it);
			newshot.id.push_back(count);
			Shot.push_back(newshot);
		}
		else {
			Shot[index].center = (*it + sum(Shot[index].content)) / (Shot[index].content.size() + 1);
			Shot[index].content.push_back(*it);
			Shot[index].id.push_back(count);
		}
		count++;
	}
	for (int i = 0; i < Shot.size(); i++) {
		if (Shot[i].content.size() <10 && i>0) {
			combine(Shot, i - 1, i);
			i--;
		}
	}
	float maxE = 0.0f;
	int indexE = 0;
	for (int i = 0; i < Shot.size(); i++) {
		int id = findMaxEntropyId(Shot[i].content, Shot[i].id);
		printf("%d\n", id);
	}

	printf("%d", Shot.size());
	cvDestroyWindow("mainWin");
}


int main(int argc, char* argv[])
{
	handleVideo();
	waitKey();
	system("pause");
	return 0;
}

opencv 基於內容的視訊關鍵幀提取（以HSV總量為特徵量）

#include "stdafx.h" #include #include "opencv2/core/core.hpp" #include #include using namespace std; using namespace cv; void RGBtoHSV(float b

[OpenCV] 基於聚類的視訊關鍵幀提取

參考論文： 1.《用非監督式聚類進行視訊鏡頭分割》 2.《一種基於視訊聚類的關鍵幀提取方法》右邊為提取出來的關鍵幀聚類的基本思想是，先把視訊聚成n個類，這n個類內的視訊幀是相似的，而類與類之間的

關於視訊關鍵幀提取演算法

系統總體設計監控系統的移動端部署影象關鍵內容的提取資料傳輸加密監控端與使用者身份認證身份認證實時預警實時預警獲取監控關鍵資訊獲取監控關鍵資訊異常行為檢測過程通常分為四個階段：初始化-通過初始化為整個系統處

基於聚類（Kmeans）演算法實現客戶價值分析系統(電信運營商)

開發環境 jupyter notebook 一、電信運營商–客戶價值分析從客戶需求出發，瞭解客戶需要什麼，他們有怎麼樣的特徵，電信運營商為客戶設定不同的優惠套餐爭取更多的使用者：推出不同的優

denseflow視訊截幀提取光流工具

本篇部落格介紹如何安裝denseflow工具，由https://github.com/yjxiong/dense_flow提供。這個工具可以對視訊進行截幀，並計算光流儲存到本地，使用了GPU編譯的OpenCV，所以提取速度很快，後續可以用於行為動作識別中，例如two-stream網

CVPR2016之A Key Volume Mining Deep Framework for Action Recognition論文閱讀（視訊關鍵幀選取）

該論文的主要思想是從視訊中選取關鍵的幀卷（frame volume）用來行為識別。該文章的意圖是通過對視訊中關鍵幀進行選取，減少與視訊表達內容不相關的視訊幀，實現視訊中行為識別準確率的提升。該文章主要從兩個方面進行闡述：1、如何選取關鍵幀。2、如何檢

python 視訊影象幀提取

import cv2 vidcap = cv2.VideoCapture('005.avi') success,image = vidcap.read() count = 0 success = True while success: success,image = vidcap.read()

Java實現視訊網站的視訊上傳、視訊轉碼、視訊關鍵幀抽圖, 及視訊播放功能視訊網站中提供的線上視訊播放功能,播放的都是FLV格式的檔案,它是Flash動畫檔案,可通過Flash製作的播放器來播

package com.webapp.service; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.util.List; import javax.servlet.Serv

0007-用OpenCV的VideoCapture類讀取avi視訊檔案,並以幀流的形式顯示出來!

OpenCV用VideoCapture類實現avi視訊讀取的相關操作，具體怎麼使用，大家看程式碼便知！示例程式碼如下：程式碼中用的視訊下載連結：http://pan.baidu.com/s/1qYbRtqW 密碼：5bcu //opencv版本:OpenCV3.0 //VS版本:VS20

ffmpeg 提取視訊檔案關鍵幀

http://blog.csdn.net/fengfeifengfei/article/details/43410205 #include "libavformat/avformat.h" #include "libavcodec/avcodec.h"#include "l

python opencv提取關鍵幀

import cv2 cap = cv2.VideoCapture('/home/lw/3661.mp4') fps = cap.get(cv2.CAP_PROP_FPS) # 獲取幀速 print fps fWidth = cap.get(cv2.CAP_PROP_F

基於spark實現kmeans的更新聚類中心的關鍵程式碼

/** * 這個方法作用是返回被給樣本的所屬聚類中心的索引，及其到這個聚類中心的距離， * 其中class VectorWithNorm(val vector: Vector, val norm: Double)這是VectorWithNorm

【機器學習】DBSCAN Algorithms基於密度的聚類算法

多次使用缺點有效結束基於需要 att 共享一、算法思想： DBSCAN(Density-Based Spatial Clustering of Applications with Noise)是一個比較有代表性的基於密度的聚

基於K-means Clustering聚類算法對電商商戶進行級別劃分(含Octave仿真)

fprintf highlight 初始 load ogre max init 金額定時在從事電商做頻道運營時，每到關鍵時間節點，大促前，季度末等等，我們要做的一件事情就是品牌池打分，更新所有店鋪的等級。例如，所以的商戶分入SKA,KA,普通店鋪,新店鋪這4個級別，對於

聚類：層次聚類、基於劃分的聚類（k-means）、基於密度的聚類、基於模型的聚類

oca 基本思想初始化 methods 根據範圍下使用對象適用於一、層次聚類 1、層次聚類的原理及分類 1）層次法（Hierarchicalmethods）先計算樣本之間的距離。每次將距離最近的點合並到同一個類。然後，再計算類與類之間的距離，將距離最近的類合

【R統計】基於相似系數的聚類分析

ant 建模 sub plc str 選擇 pre light 均值題目：對48名應聘者數據的自變量作聚類分析，選擇變量的相關系數作為變量間的相似系數（cij），距離定義為dij=1-cij。分別用最長距離法、均值法、重心法和Ward法作聚類分析，並畫出相應的譜系圖。

基於w2v詞向量聚類出現的問題（有待解決）

cut 主題分詞問題 tmp kmean cab rain 如果 1.訓練詞向量代碼如下：#訓練詞語為向量表示def w2v_train(self): ques = self.cu.execute(‘select question from activity‘)#

Qt實戰--基於FFmpeg的視訊引擎實現類

FFmpeg 搞音視訊開發的基本都會接觸到FFmpeg這個庫，支援幾乎所有的音視訊編解碼格式。相對於上節我們用OpenCV實現的獲取視訊幀，FFmpeg對底層的控制粒度更細，有利於我們後續開發，精準控制編解碼格式，獲取碼流資訊，實現進度調整等； FFmpeg編譯或下載教程網上很多

機器學習筆記（參考吳恩達機器學習視訊筆記）12_聚類

12 聚類監督學習中，訓練集帶有標籤，目標是找到能夠區分正負樣本的決策邊界，需要根據標籤擬合一個假設函式。非監督學習中，需要將無標籤的訓練資料輸入到一個演算法，此演算法可以找到這些資料的內在結構。一個能夠根據資料的內在結構，將它們分成幾個不同的點集（或簇）的演算法，就被稱為聚類演算法。聚類

0008-用OpenCV的VideoWriter類進行視訊寫/輸出操作！

OpenCV提供了VideoWriter類進行視訊寫操作，這個類詳細的說明大家可以去OpenCV官方網站去查詢，官方網站對這個類的說明的地址為： http://docs.opencv.org/3.0-beta/ ... eo.html#videowriter 這裡不贅述了！直接上示例程式碼，大

[OpenCV] 基於聚類的視訊關鍵幀提取

相關推薦