OpenCV影象處理以及人臉識別
OpenCV基礎
OpenCV是一個開源的計算機視覺庫。提供了很多影象處理常用的工具
批註:本文所有圖片資料都在我的GitHub倉庫
讀取圖片並顯示
import numpy as np import cv2 as cv original = cv.imread('../machine_learning_date/forest.jpg') cv.imshow('Original', original)
顯示圖片某個顏色通道的影象
blue = np.zeros_like(original) blue[:, :, 0] = original[:, :, 0] # 0 - 藍色通道 cv.imshow('Blue', blue) green = np.zeros_like(original) green[:, :, 1] = original[:, :, 1] # 1 - 綠色通道 cv.imshow('Green', green) red = np.zeros_like(original) red[:, :, 2] = original[:, :, 2] # 2 - 紅色通道 cv.imshow('Red', red)
影象剪裁
h, w = original.shape[:2] # (397, 600) l, t = int(w / 4), int(h / 4) # 左上 r, b = int(w * 3 / 4), int(h * 3 / 4) # 右下 cropped = original[t:b, l:r] cv.imshow('Cropped', cropped)
影象縮放
cv2.resize(src,dsize,dst=None,fx=None,fy=None,interpolation=None)
引數:
- scr:原圖
- dsize:輸出影象尺寸
- fx:沿水平軸的比例因子
- fy:沿垂直軸的比例因子
- interpolation:插值方法
# 輸出影象大小=輸入影象大小/4 scaled1 = cv.resize(original, (int(w / 4), int(h / 4)), interpolation=cv.INTER_LINEAR) cv.imshow('Scaled1', scaled1)
# 原影象大小,沿x軸,y軸的縮放係數 scaled2 = cv.resize(scaled1, None, fx=4, fy=4, interpolation=cv.INTER_LINEAR) cv.imshow('Scaled2', scaled2) cv.waitKey() # 等待使用者按鍵觸發,或者按 Ese 鍵 停止等待
影象檔案儲存
cv.imwrite('../ml_data/blue.jpg', blue)
邊緣檢測
物體的邊緣檢測是物體識別常用的手段。邊緣檢測常用亮度梯度方法。通過識別亮度梯度變化最大的畫素點從而檢測出物體的邊緣。
import cv2 as cv # 讀取並展示影象 original = cv.imread('../machine_learning_date/chair.jpg', cv.IMREAD_GRAYSCALE) cv.imshow('Original', original)
索貝爾邊緣識別
cv.Sobel(original, cv.CV_64F, 1, 0, ksize=5)
引數:
- src:源影象
- ddepth:cv.CV_64F:卷積運算使用資料型別為64位浮點型(保證微分的精度)
- dx:1表示取水平方向索貝爾偏微分
- dy:0表示不取垂直方向索貝爾偏微分
- ksize:卷積核為5*5的方陣
水平方向索貝爾偏微分
hsobel = cv.Sobel(original, cv.CV_64F, 1, 0, ksize=5) cv.imshow('H-Sobel', hsobel)
垂直方向索貝爾偏微分
vsobel = cv.Sobel(original, cv.CV_64F, 0, 1, ksize=5) cv.imshow('V-Sobel', vsobel)
水平和垂直方向索貝爾偏微分
sobel = cv.Sobel(original, cv.CV_64F, 1, 1, ksize=5) cv.imshow('Sobel', sobel)
拉普拉斯邊緣識別
cv.Laplacian(original, cv.CV_64F)
laplacian = cv.Laplacian(original, cv.CV_64F) cv.imshow('Laplacian', laplacian)
Canny邊緣識別
cv.Canny(original, 50, 240)
- image:輸入影象
-
threshold1:50,水平方向閾值
- threshold1:240,垂直方向閾值
canny = cv.Canny(original, 50, 80) cv.imshow('Canny', canny) cv.waitKey()
亮度提升
OpenCV提供了直方圖均衡化的方式實現亮度提升,更有利於邊緣識別與物體識別模型的訓練。
彩色圖轉為灰度圖
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
直方圖均衡化
equalized_gray = cv.equalizeHist(gray)
案例:
讀取影象
import cv2 as cv # 讀取圖片 original = cv.imread('../machine_learning_date/sunrise.jpg') cv.imshow('Original', original) # 顯示圖片
彩色圖轉為灰度圖
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY) cv.imshow('Gray', gray)
灰度圖直方圖均衡化
equalized_gray = cv.equalizeHist(gray) cv.imshow('Equalized Gray', equalized_gray)
YUV:亮度,色度,飽和度
yuv = cv.cvtColor(original, cv.COLOR_BGR2YUV) yuv[..., 0] = cv.equalizeHist(yuv[..., 0]) # 亮度 直方圖均衡化 yuv[..., 1] = cv.equalizeHist(yuv[..., 1]) # 色度 直方圖均衡化 yuv[..., 2] = cv.equalizeHist(yuv[..., 2]) # 飽和度 直方圖均衡化 equalized_color = cv.cvtColor(yuv, cv.COLOR_YUV2BGR) cv.imshow('Equalized Color', equalized_color) cv.waitKey()
角點檢測
平直稜線的交匯點(顏色梯度方向改變的畫素點的位置)
Harris角點檢測器
gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
corners = cv.cornerHarris(gray, 7, 5, 0.04)
- src:輸入單通道8位或浮點影象。
- blockSize:角點檢測區域大小
- ksize:Sobel求導中使用的視窗大小
- k:邊緣線方向改變超過閾值0.04弧度即為一個角點,一般取[0.04 0.06]
案例:
import cv2 as cv original = cv.imread('../machine_learning_date/box.png') cv.imshow('Original', original) gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY) # 轉換成灰度,減少計算量 cv.imshow('Gray', gray) corners = cv.cornerHarris(gray, 7, 5, 0.04) # Harris角點檢測器 # 影象混合 mixture = original.copy() mixture[corners > corners.max() * 0.01] = [0, 0, 255] # BGR [0, 0, 255]變紅 cv.imshow('Corner', mixture) cv.waitKey()
影象識別
特徵點檢測
常用特徵點檢測有:STAR特徵點檢測 / SIFT特徵點檢測
特徵點檢測結合了 邊緣檢測 與 角點檢測 從而識別出圖形的特徵點
STAR特徵點檢測相關API如下:
star = cv.xfeatures2d.StarDetector_create() # 建立STAR特徵點檢測器
keypoints = star.detect(gray) # 檢測出gray影象所有的特徵點
把所有的特徵點繪製在mixture影象中
cv.drawKeypoints(original, keypoints, mixture, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
引數:
- image:原圖片
- keypoints:源影象中的關鍵點
- outImage:輸出圖片
- flags:標誌設定圖形特徵
案例:
import cv2 as cv original = cv.imread('../machine_learning_date/table.jpg') gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY) # 變成灰度圖,減少計算 cv.imshow('Gray', gray) star = cv.xfeatures2d.StarDetector_create() # 建立STAR特徵點檢測器 keypoints = star.detect(gray) # 檢測出gray影象所有的特徵點 mixture = original.copy() # drawKeypoints方法可以把所有的特徵點繪製在mixture影象中 cv.drawKeypoints(original, keypoints, mixture, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) cv.imshow('Mixture', mixture) cv.waitKey()
SIFT特徵點檢測相關API:
sift = cv.xfeatures2d.SIFT_create() # 建立SIFT特徵點檢測器
keypoints = sift.detect(gray) # 檢測出gray影象所有的特徵點
案例:
import cv2 as cv original = cv.imread('../machine_learning_date/table.jpg') gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY) cv.imshow('Gray', gray) sift = cv.xfeatures2d.SIFT_create() # 建立SIFT特徵點檢測器 keypoints = sift.detect(gray) # 檢測出gray影象所有的特徵點 mixture = original.copy() # 把所有的特徵點繪製在mixture影象中 cv.drawKeypoints(original, keypoints, mixture, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) cv.imshow('Mixture', mixture) cv.waitKey()
特徵值矩陣
影象特徵值矩陣(描述)記錄了影象的特徵點以及每個特徵點的梯度資訊,相似影象的特徵值矩陣也相似。這樣只要有足夠多的樣本,就可以基於隱馬爾科夫模型進行影象內容的識別。
特徵值矩陣相關API:
sift = cv.xfeatures2d.SIFT_create() keypoints = sift.detect(gray) _, desc = sift.compute(gray, keypoints)
案例:
import cv2 as cv import matplotlib.pyplot as plt original = cv.imread('../machine_learning_date/table.jpg') gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY) cv.imshow('Gray', gray) sift = cv.xfeatures2d.SIFT_create() # 建立SIFT特徵點檢測器 keypoints = sift.detect(gray) # 檢測出gray影象所有的特徵點 _, desc = sift.compute(gray, keypoints) print(desc.shape) # (454, 128) plt.matshow(desc.T, cmap='jet', fignum='Description') plt.title('Description') plt.xlabel('Feature') plt.ylabel('Sample') plt.tick_params(which='both', top=False, labeltop=False, labelbottom=True, labelsize=10) plt.show()
物體識別
1、讀取training資料夾中的訓練圖片樣本,每個圖片對應一個desc矩陣,每個desc都有一個類別(car)
2、把所有類別為car的desc合併在一起,形成訓練集
| desc | |
| desc | car |
| desc | |
.....
由上述訓練集樣本可以訓練一個用於匹配car的HMM。
3、訓練3個HMM分別對應每個物體類別。 儲存在列表中。
4、讀取testing資料夾中的測試樣本,整理測試樣本
| desc | car |
| desc | moto |
5、針對每一個測試樣本:
- 分別使用3個HMM模型,對測試樣本計算score得分。
- 取3個模型中得分最高的模型所屬類別作為預測類別。
import os import numpy as np import cv2 as cv import hmmlearn.hmm as hl def search_files(directory): directory = os.path.normpath(directory) objects = {} for curdir, subdirs, files in os.walk(directory): for file in files: if file.endswith('.jpg'): label = curdir.split(os.path.sep)[-1] if label not in objects: objects[label] = [] path = os.path.join(curdir, file) objects[label].append(path) return objects # 載入訓練集樣本資料,訓練模型,模型儲存 train_objects = search_files('../machine_learning_date/objects/training') train_x, train_y = [], [] for label, filenames in train_objects.items(): descs = np.array([]) for filename in filenames: image = cv.imread(filename) gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) # 範圍縮放,使特徵描述矩陣樣本數量一致 h, w = gray.shape[:2] f = 200 / min(h, w) gray = cv.resize(gray, None, fx=f, fy=f) sift = cv.xfeatures2d.SIFT_create() # 建立SIFT特徵點檢測器 keypoints = sift.detect(gray) # 檢測出gray影象所有的特徵點 _, desc = sift.compute(gray, keypoints) # 轉換成特徵值矩陣 if len(descs) == 0: descs = desc else: descs = np.append(descs, desc, axis=0) train_x.append(descs) train_y.append(label) models = {} for descs, label in zip(train_x, train_y): model = hl.GaussianHMM(n_components=4, covariance_type='diag', n_iter=100) models[label] = model.fit(descs) # 測試模型 test_objects = search_files('../machine_learning_date/objects/testing') test_x, test_y = [], [] for label, filenames in test_objects.items(): descs = np.array([]) for filename in filenames: image = cv.imread(filename) gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) sift = cv.xfeatures2d.SIFT_create() keypoints = sift.detect(gray) _, desc = sift.compute(gray, keypoints) if len(descs) == 0: descs = desc else: descs = np.append(descs, desc, axis=0) test_x.append(descs) test_y.append(label) # 遍歷所有測試樣本 使用model匹配測試樣本檢視每個模型的匹配分數 pred_y = [] for descs, test_label in zip(test_x, test_y): best_score, best_label = None, None for pred_label, model in models.items(): score = model.score(descs) if (best_score == None) or (best_score < score): best_score = score best_label = pred_label print(test_label, '->', pred_label, score) # airplane -> airplane -373374.23370679974 # airplane -> car -374022.20182585815 # airplane -> motorbike -374127.46289302857 # car -> airplane -163638.3153800373 # car -> car -163691.52001099114 # car -> motorbike -164410.0557508754 # motorbike -> airplane -467472.6294620241 # motorbike -> car -470149.6143097087 # motorbike -> motorbike -464606.0040570249 pred_y.append(best_label) print(test_y) # ['airplane', 'car', 'motorbike'] print(pred_y) # ['airplane', 'airplane', 'motorbike']
人臉識別
人臉識別與影象識別的區別在於人臉識別需要識別出兩個人的不同點。
視訊捕捉
通過OpenCV訪問視訊捕捉裝置(視訊頭),從而獲取影象幀。
視訊捕捉相關API:
import cv2 as cv # 獲取視訊捕捉裝置 video_capture = cv.VideoCapture(0) # 讀取一幀 frame = video_capture.read()[1] cv.imshow('VideoCapture', frame) # 釋放視訊捕捉裝置 video_capture.release() # 銷燬cv的所有視窗 cv.destroyAllWindows()
案例:
import cv2 as cv # 獲取視訊捕獲裝置 video_capture = cv.VideoCapture(0) # 讀取一幀 while True: frame = video_capture.read()[1] cv.imshow('frame', frame) # 每隔33毫秒自動更新影象 if cv.waitKey(33) == 27: # 退出鍵是27(Esc) break video_capture.release() cv.destroyAllWindows()
人臉定位
哈爾級聯人臉定位
import cv2 as cv # 通過特徵描述檔案構建哈爾級聯人臉識別器 fd = cv.CascadeClassifier('../data/haar/face.xml') # 從一個影象中識別出所有的人臉區域 # 1.3:為最小的人臉尺寸 # 5:最多找5張臉 # 返回: # faces: 抓取人臉(矩形區域)列表 [(l,t,w,h),(),()..] faces = fd.detectMultiScale(frame, 1.3, 5) face = faces[0] # 第一張臉 # 繪製橢圓 cv.ellipse( frame, # 影象 (l + a, t + b), # 橢圓心 (a, b), # 半徑 0, # 橢圓旋轉角度 0, 360, # 起始角, 終止角 (255, 0, 255), # 顏色 2 # 線寬 )
案例:
import cv2 as cv # 哈爾級聯人臉定位器 fd = cv.CascadeClassifier('../../data/haar/face.xml') ed = cv.CascadeClassifier('../../data/haar/eye.xml') nd = cv.CascadeClassifier('../../data/haar/nose.xml') vc = cv.VideoCapture(0) while True: frame = vc.read()[1] faces = fd.detectMultiScale(frame, 1.3, 5) for l, t, w, h in faces: a, b = int(w / 2), int(h / 2) cv.ellipse(frame, (l + a, t + b), (a, b), 0, 0, 360, (255, 0, 255), 2) face = frame[t:t + h, l:l + w] eyes = ed.detectMultiScale(face, 1.3, 5) for l, t, w, h in eyes: a, b = int(w / 2), int(h / 2) cv.ellipse(face, (l + a, t + b), (a, b), 0, 0, 360, (0, 255, 0), 2) noses = nd.detectMultiScale(face, 1.3, 5) for l, t, w, h in noses: a, b = int(w / 2), int(h / 2) cv.ellipse(face, (l + a, t + b), (a, b), 0, 0, 360, (0, 255, 255), 2) cv.imshow('VideoCapture', frame) if cv.waitKey(33) == 27: break vc.release() cv.destroyAllWindows()
人臉識別
簡單人臉識別:OpenCV的LBPH(區域性二值模式直方圖)
- 讀取樣本圖片資料,整理圖片的路徑列表
- 讀取每張圖片,基於haar裁剪每張人臉,把人臉資料放入train_x,作為訓練資料。在整理train_y時,由於Bob、Sala、Roy是字串,需要把字串做一個標籤編碼 LabelEncoder
- 遍歷訓練集,把訓練集交給LBPH人臉識別模型進行訓練。
- 讀取測試集資料,整理圖片的路徑列表
- 遍歷每張圖片,把圖片中的人臉使用相同的方式裁剪,把人臉資料交給LBPH模型進行類別預測,得到預測結果。
- 以影象的方式輸出結果。
# -*- coding: utf-8 -*- import os import numpy as np import cv2 as cv import sklearn.preprocessing as sp fd = cv.CascadeClassifier('../machine_learning_date/haar/face.xml') def search_faces(directory): directory = os.path.normpath(directory) faces = {} for curdir, subdirs, files in os.walk(directory): for jpeg in (file for file in files if file.endswith('.jpg')): path = os.path.join(curdir, jpeg) label = path.split(os.path.sep)[-2] if label not in faces: faces[label] = [] faces[label].append(path) return faces train_faces = search_faces('../machine_learning_date/faces/training') codec = sp.LabelEncoder() codec.fit(list(train_faces.keys())) train_x, train_y = [], [] for label, filenames in train_faces.items(): for filename in filenames: image = cv.imread(filename) gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100)) for l, t, w, h in faces: train_x.append(gray[t:t + h, l:l + w]) train_y.append(codec.transform([label])[0]) train_y = np.array(train_y) ''' 訓練集結構: train_x train_y ------------------- | face | 0 | ------------------- | face | 1 | ------------------- | face | 2 | ------------------- | face | 1 | ------------------- ''' # 區域性二值模式直方圖人臉識別分類器 model = cv.face.LBPHFaceRecognizer_create() model.train(train_x, train_y) # 測試 test_faces = search_faces( '../ml_data/faces/testing') test_x, test_y, test_z = [], [], [] for label, filenames in test_faces.items(): for filename in filenames: image = cv.imread(filename) gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY) faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100)) for l, t, w, h in faces: test_x.append(gray[t:t + h, l:l + w]) test_y.append(codec.transform([label])[0]) a, b = int(w / 2), int(h / 2) cv.ellipse(image, (l + a, t + b), (a, b), 0, 0, 360, (255, 0, 255), 2) test_z.append(image) test_y = np.array(test_y) pred_test_y = [] for face in test_x: pred_code = model.predict(face)[0] pred_test_y.append(pred_code) print(codec.inverse_transform(test_y)) print(codec.inverse_transform(pred_test_y)) escape = False while not escape: for code, pred_code, image in zip(test_y, pred_test_y, test_z): label, pred_label = codec.inverse_transform([code, pred_code]) text = '{} {} {}'.format(label, '==' if code == pred_code else '!=', pred_label) cv.putText(image, text, (10, 60), cv.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 6) cv.imshow('Recognizing...', image) if cv.waitKey(1000) == 27: escape = True break