python專案實戰:基於opencv、pyaudio和ffmpeg的錄影小程式
一、前言
開發前的準備:
- 需要用到的第三方庫:opencv-python、wave、pyaudio、ffmpy3(這4個庫都可以通過pip工具直接安裝)
- 額外的工具(用來合成音訊和視訊):ffmpeg
ffmpeg下載下載地址:官網:https://www.ffmpeg.org/
Github地址:https://github.com/FFmpeg/FFmpeg
百度網盤連結:https://pan.baidu.com/s/1UBcoXy6v3XG8oz0GuMKd8w 提取碼:90ow
上面3個地址來源於此文章:https://www.cnblogs.com/Neeo/articles/11677715.html
參考:
此錄影小程式的程式碼參考了以下文章:
https://www.jianshu.com/p/a401466f3c9d
https://blog.csdn.net/pythonlaodi/article/details/109222790
https://cloud.tencent.com/developer/news/147840
https://www.cnblogs.com/Neeo/articles/11677715.html
https://ffmpy3.readthedocs.io/en/latest/examples.html聲音的取樣頻率:https://baike.baidu.com/item/%E9%87%87%E6%A0%B7%E9%A2%91%E7%8E%87/1494233
二、過程分析
-
採用多執行緒的方式開始錄製畫面和錄製聲音:
執行緒一:呼叫opencv開啟攝像頭並把畫面儲存至視訊
執行緒二:呼叫pyaudio開啟麥克風進行錄音 -
呼叫wave模組將音訊資料儲存至wav檔案。
-
用ffmpeg將音訊和視訊合成為一個視訊。
1.呼叫opencv開啟攝像頭並把畫面儲存至視訊
# 初始化攝像頭
self.cap = cv.VideoCapture(1)
if not self.cap.isOpened():
print('攝像頭開啟失敗')
# 初始化 cv.VideoWriter()類,用來儲存視訊
w, h = int(self.cap. get(cv.CAP_PROP_FRAME_WIDTH)), int(self.cap.get(cv.CAP_PROP_FRAME_HEIGHT))
fourCC = cv.VideoWriter_fourcc('X', 'V', 'I', 'D')
self.out = cv.VideoWriter(filename+'.avi', fourCC, fps=30, frameSize=(w, h))
這裡要注意的是在cv.VideoWriter()類例項化的時候,fps設為30。fps指的是儲存視訊的幀率大小,剛開始我設定為20的時候,發現最後錄製的畫面時長比音訊的時長要長,並且音訊和視訊是對不上的。如果你們也出現這種狀況,嘗試修改一下fps的值。
開始錄製畫面,當按下鍵盤s的時候結束錄製:
while True:
isOpened,frame = self.cap.read()
if not isOpened:
break
self.out.write(frame)
cv.imshow('frame',frame)
if cv.waitKey(1) == ord('s'):
self.flag_read = False
break
2.呼叫pyaudio開啟麥克風進行錄音
self.NUM_SAMPLES = 1000 # 錄音時緩衝區的幀數。
self.my_buf = b'' # 用bytes型別儲存音訊內容
pa = PyAudio()
self.stream = pa.open(format=paInt16, channels=self.channels, rate=self.framerate, input=True,
frames_per_buffer=self.NUM_SAMPLES)
# 引數frames_per_buffer:Specifies the number of frames per buffer.指定每個緩衝區的幀數。
def collect_mp3(self):
"""
錄音
"""
while self.flag_read:
string_audio_data = self.stream.read(self.NUM_SAMPLES)
self.my_buf += string_audio_data
self.stream.close()
3.呼叫wave模組將音訊資料儲存至wav檔案。
self.framerate = 32000 # 取樣頻率
self.channels = 1 # 聲道
self.sampWidth = 2 # 量化位數(byte單位)
def save_wave_file(self):
"""
寫入檔案
"""
with wave.open(self.filename + '.wav', 'wb') as wf:
wf.setnchannels(self.channels)
wf.setsampwidth(self.sampWidth)
wf.setframerate(self.framerate)
wf.writeframes(self.my_buf)
print('音訊寫入完成.')
4.用ffmpeg將音訊和視訊合成為一個視訊。
from ffmpy3 import FFmpeg
FFmpeg(inputs={f'{self.filename}.avi':None, f'{self.filename}.wav':None},
outputs={f'{self.filename}.mp4':'-c:v h264 -c:a ac3'}).run()
ffmpy3是用來驅動ffmpeg程式的,所以電腦上一定要先安裝ffmpeg,安裝完成之後最好將ffmpeg.exe檔案所在路徑新增到系統環境變數。
ffmpeg的下載地址在文章開頭。
三、完整程式碼
# Author:FuJLiny
# CSDN blog homepage:https://blog.csdn.net/FujLiny
# ------version 1-1,Update time:2020/12/6------
import os
import threading
import time
import wave
from pyaudio import PyAudio,paInt16
import cv2 as cv
from ffmpy3 import FFmpeg
class VCR:
def __init__(self,filename):
"""
:param filename: 檔名(不帶字尾)
"""
self.filename = filename
if os.path.exists(filename+'.mp4'):
os.remove(filename+'.mp4')
self.framerate = 32000 # 取樣頻率
self.NUM_SAMPLES = 1000 # 錄音時緩衝區的幀數。
self.channels = 1 # 聲道
self.sampWidth = 2 # 量化位數(byte單位)
self.my_buf = b'' # 用bytes型別儲存音訊內容
self.flag_read = True
self.cap = cv.VideoCapture(1)
if not self.cap.isOpened():
print('攝像頭開啟失敗')
w, h = int(self.cap.get(cv.CAP_PROP_FRAME_WIDTH)), int(self.cap.get(cv.CAP_PROP_FRAME_HEIGHT))
fourCC = cv.VideoWriter_fourcc('X', 'V', 'I', 'D')
self.out = cv.VideoWriter(filename+'.avi', fourCC, fps=30, frameSize=(w, h))
pa = PyAudio()
self.stream = pa.open(format=paInt16, channels=self.channels, rate=self.framerate, input=True,
frames_per_buffer=self.NUM_SAMPLES)
# 引數frames_per_buffer:Specifies the number of frames per buffer.指定每個緩衝區的幀數。
def save_wave_file(self):
"""
寫入檔案
"""
with wave.open(self.filename+'.wav','wb') as wf:
wf.setnchannels(self.channels)
wf.setsampwidth(self.sampWidth)
wf.setframerate(self.framerate)
wf.writeframes(self.my_buf)
print('音訊寫入完成.')
def collect_mp3(self):
"""
錄音
"""
while self.flag_read:
string_audio_data = self.stream.read(self.NUM_SAMPLES)
self.my_buf += string_audio_data
self.stream.close()
def collect_mp4(self):
"""
攝像頭畫面錄製
:return:
"""
font = cv.FONT_HERSHEY_COMPLEX
txt = 'Video start when you press the space key.'
while True:
isOpened, frame = self.cap.read()
if not isOpened:
break
cv.putText(frame,txt,(30,150),font,0.8,(0,0,255),1)
cv.imshow('frame', frame)
if cv.waitKey(1) == ord(' '):
break
print('畫面錄製開始……')
threading.Thread(target=self.collect_mp3).start()
while True:
isOpened,frame = self.cap.read()
if not isOpened:
break
self.out.write(frame)
cv.imshow('frame',frame)
if cv.waitKey(1) == ord('s'):
# 按下鍵盤s結束錄製
self.flag_read = False
break
self.out.release()
self.cap.release()
cv.destroyAllWindows()
def runMain(self):
"""
啟動位置
"""
threading.Thread(target=self.collect_mp4).start()
while self.flag_read:
time.sleep(1)
print('準備將音訊寫入檔案……')
self.save_wave_file()
print('準備將音訊和視訊進行合成……')
# subprocess.Popen(f'ffmpeg -i {self.filename}.avi -i {self.filename}.wav {self.filename}.mp4')
FFmpeg(inputs={f'{self.filename}.avi':None, f'{self.filename}.wav':None},
outputs={f'{self.filename}.mp4':'-c:v h264 -c:a ac3'}).run()
print('合成成功,程式結束。')
if __name__ == '__main__':
v = VCR(filename='my_audio')
v.runMain()