RTP解析音視訊幀
阿新 • • 發佈:2018-11-28
RTP解析音視訊幀
RTP解析H264、AAC負載
RTSP中音視訊是通過RTP傳輸的,本文記錄從RTP解析出H264、AAC的過程。
協議介紹可參考 https://blog.csdn.net/lostyears/article/details/51374997
拿到RTP資料後,先去除12位元組RTP頭部,然後進行下面處理。
解析H264
資料較大的H264包,需要進行RTP分片傳送。
實現程式碼:
/*
* @par pBufIn 待解析RTP(不包含12位元組頭)
* nLenIn 載荷長度
* pBufOut 裝載H264的buf(外部傳入,分配空間不小於nLenIn)
* nLenOut 一幀H264資料長度,函式返回true時有效
*
* @return true 一幀結束
* false 分片未結束
*/
bool UnpackRtpH264(const UInt8 *pBufIn, const Int32 nLenIn, UInt8 *pBufOut, Int32& nLenOut)
{
bool bFinished = true;
do
{
nLenOut = 0;
const Int32 eFrameType = pBufIn[0] & 0x1F;
if (eFrameType >= 1 && eFrameType <= 23) //單一NAL單元
{
pBufOut[ 0] = 0x00;//新增H264四位元組頭部
pBufOut[1] = 0x00;
pBufOut[2] = 0x00;
pBufOut[3] = 0x01;
memcpy(pBufOut + 4, pBufIn, nLenIn);
nLenOut = nLenIn + 4;
}
else //分片NAL單元,由多個RTP包拼接成完整的NAL單元
{
bFinished = false;
if (pBufIn[1] & 0x80) // 分片Nal單元開始位
{
m_nH264FrmeSize = 0;
pBufOut[0] = 0x00;
pBufOut[1] = 0x00;
pBufOut[2] = 0x00;
pBufOut[3] = 0x01;
pBufOut[4] = ((pBufIn[0] & 0xe0)|(pBufIn[1] & 0x1f));//取pBufIn[0]的前3位 與 pBufIn[1]的後5位
memcpy(pBufOut + 5, pBufIn + 2, nLenIn - 2); //跳過分片RTP的前兩位元組
m_nH264FrmeSize = nLenIn + 5 - 2;
}
else //後續的Nal單元載荷
{
Assert(m_nH264FrmeSize + nLenIn - 2 <= MAX_FRAME_SISE);
memcpy(pBufOut + m_nH264FrmeSize, pBufIn + 2, nLenIn -2);//跳過分片RTP的前兩位元組
m_nH264FrmeSize += nLenIn -2;
if (pBufIn[1] & 0x40) // 分片Nal單元結束位
{
nLenOut = m_nH264FrmeSize;
m_nH264FrmeSize = 0;
bFinished = true;
}
}
}
}while (0);
return bFinished;
}
解析AAC
這裡要注意,可能是一個RTP包含多個AAC幀,之前按網上找的RTP後直接負載1幀AAC,大部分場景沒問題,後面有個輸入源解析AAC後沒聲音,最後發現是一個RTP包含了多個AAC負載。解析協議規範最好還是花時間研讀協議規範文件,網上找的部落格介紹可能不夠全面,導致部分場景失效。
實現程式碼如下:
/*
* @par pBufIn 待解析RTP(不包含12位元組頭)
* nLenIn 載荷長度
* pBufOut 裝載AAC的buf(外部傳入,分配空間不小於nLenIn)
* nLenOut 一幀AAC資料長度,函式返回true時有效
*
* 注:可能一個RTP包中包含多個AAC幀,是通過AU_HEADER_LENGTH(除以8得幀個數)來判斷
*
* @return true 一幀結束
* false 分片未結束
*/
bool UnpackRtpAAC(const UInt8 * pBufIn, const Int32 nLenIn, UInt8* pBufOut, Int32& nLenOut)
{
bool bFinished = true;
do
{
nLenOut = 0;
Int32 nAuHeaderOffset = 0;//查詢頭部的偏移,每次2位元組
const UInt16 AU_HEADER_LENGTH = (((pBufIn[nAuHeaderOffset] << 8) | pBufIn[nAuHeaderOffset + 1]) >> 4);//首2位元組表示Au-Header的長度,單位bit,所以除以16得到Au-Header位元組數
nAuHeaderOffset += 2;
Assert(nLenIn > (2 + AU_HEADER_LENGTH*2));
vector<UInt32 > vecAacFrameLen[AU_HEADER_LENGTH];
for (int i = 0; i < AU_HEADER_LENGTH; ++i)
{
const UInt16 AU_HEADER = ((pBufIn[nAuHeaderOffset] << 8) | pBufIn[nAuHeaderOffset + 1]);//之後的2位元組是AU_HEADER
UInt32 nAac = (AU_HEADER >> 3);//其中高13位表示一幀AAC負載的位元組長度,低3位無用
vecAacFrameLen->push_back(nAac);
nAuHeaderOffset += 2;
}
const UInt8 *pAacPayload = pBufIn + nAuHeaderOffset;//真正AAC負載開始處
UInt32 nAacPayloadOffset = 0;
for (int j = 0; j < AU_HEADER_LENGTH; ++j)
{
const UInt32 nAac = vecAacFrameLen->at(j);
//生成ADTS頭
SAacParam param(nAac, m_AudioInfo.nSample, m_AudioInfo.nChannel);
CADTS adts;
adts.Init(param);
//寫入ADTS頭
memcpy(pBufOut + nLenOut, adts.GetBuf(), adts.GetBufSize());
nLenOut += adts.GetBufSize();
//寫入AAC負載
memcpy(pBufOut + nLenOut, pAacPayload + nAacPayloadOffset, nAac);
nLenOut += nAac;
nAacPayloadOffset += nAac;
}
Assert((nLenIn - nAuHeaderOffset) == nAacPayloadOffset);
} while (0);
return bFinished;
}
封裝AAC的ADTS頭部
CADTS.h
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
#define BYTE_NUMBIT 8 /* bits in byte (char) */
#define N_ADTS_SIZE 7
/*
* 定義是哪個級別的AAC
*/
enum eAACProfile
{
E_AAC_PROFILE_MAIN_PROFILE = 0,
E_AAC_PROFILE_LC,
E_AAC_PROFILE_SSR,
E_AAC_PROFILE_PROFILE_RESERVED,
};
enum eAACSample
{
E_AAC_SAMPLE_96000_HZ = 0,
E_AAC_SAMPLE_88200_HZ,
E_AAC_SAMPLE_64000_HZ,
E_AAC_SAMPLE_48000_HZ,
E_AAC_SAMPLE_44100_HZ,
E_AAC_SAMPLE_32000_HZ,
E_AAC_SAMPLE_24000_HZ,
E_AAC_SAMPLE_22050_HZ,
E_AAC_SAMPLE_16000_HZ,
E_AAC_SAMPLE_12000_HZ,
E_AAC_SAMPLE_11025_HZ,
E_AAC_SAMPLE_8000_HZ,
E_AAC_SAMPLE_7350_HZ,
E_AAC_SAMPLE_RESERVED,
};
enum eAACChannel
{
E_AAC_CHANNEL_SPECIFC_CONFIG = 0,
E_AAC_CHANNEL_MONO,
E_AAC_CHANNEL_STEREO,
E_AAC_CHANNEL_TRIPLE_TRACK,
E_AAC_CHANNEL_4,
E_AAC_CHANNEL_5,
E_AAC_CHANNEL_6,
E_AAC_CHANNEL_8,
E_AAC_CHANNEL_RESERVED,
};
enum eMpegId
{
E_MPEG4 = 0,
E_MPEG_2
};
struct SAacParam
{
SAacParam(UInt32 playod, Int32 sample, Int32 channel = 1, eAACProfile profile = E_AAC_PROFILE_LC, eMpegId id = E_MPEG4)
:eId(id), eProfile(profile), nChannel(channel), nSample(sample), nPlayod(playod)
{
};
eMpegId eId;
eAACProfile eProfile;
Int32 nChannel;
Int32 nSample;
UInt32 nPlayod;//aac負載大小(不包含ADTS頭)
};
class CADTS
{
public:
CADTS();
public:
/*
* 初始化函式完成ADTS頭的填充
*/
void Init(const SAacParam& aacHead);
/*
* 獲取ADTS頭地址
*/
UInt8* GetBuf();
/*
* 獲取ADTS頭長度(位元組)
*/
UInt32 GetBufSize() const ;
private:
int PutBit(UInt32 data, int numBit);
int WriteByte(UInt32 data, int numBit);
/*
* 取樣率下標
*/
static eAACSample GetSampleIndex(const UInt32 nSample);
/*
* 聲道下標
*/
static eAACChannel GetChannelIndex(const UInt32 nChannel);
private:
UInt8 m_pBuf[N_ADTS_SIZE]; //buffer的頭指標
const UInt32 m_nBit; //總位數
UInt32 m_curBit; //當前位數
};
CADTS.cpp
CADTS::CADTS():m_pBuf(),m_nBit(BYTE_NUMBIT*N_ADTS_SIZE),m_curBit(0)
{
}
void CADTS::Init(const SAacParam &aacHead)
{
/* Fixed ADTS header */
PutBit(0xFFFF, 12);// 12 bit Syncword
PutBit(aacHead.eId, 1); //ID == 0 for MPEG4 AAC, 1 for MPEG2 AAC
PutBit(0, 2); //layer == 0
PutBit(1, 1); //protection absent
PutBit(aacHead.eProfile, 2); //profile
PutBit(CADTS::GetSampleIndex(aacHead.nSample), 4); //sampling rate
PutBit(0, 1); //private bit
PutBit(CADTS::GetChannelIndex(aacHead.nChannel), 3); //numChannels
PutBit(0, 1); //original/copy
PutBit(0, 1); // home
/* Variable ADTS header */
PutBit(0, 1); // copyr. id. bit
PutBit(0, 1); // copyr. id. start
PutBit(GetBufSize() + aacHead.nPlayod, 13); //ADTS幀的長度包括ADTS頭和AAC原始流
PutBit(0x7FF, 11); // buffer fullness (0x7FF for VBR)
PutBit(0 ,2); //raw data blocks (0+1=1)
}
UInt8 *CADTS::GetBuf()
{
return m_pBuf;
}
UInt32 CADTS::GetBufSize() const
{
return m_nBit/BYTE_NUMBIT;
}
int CADTS::PutBit(UInt32 data, int numBit)
{
int num,maxNum,curNum;
unsigned long bits;
if (numBit == 0)
return 0;
/* write bits in packets according to buffer byte boundaries */
num = 0;
maxNum = BYTE_NUMBIT - m_curBit % BYTE_NUMBIT;
while (num < numBit) {
curNum = min(numBit-num,maxNum);
bits = data>>(numBit-num-curNum);
if (WriteByte(bits, curNum)) {
return 1;
}
num += curNum;
maxNum = BYTE_NUMBIT;
}
return 0;
}
int CADTS::WriteByte(UInt32 data, int numBit)
{
long numUsed,idx;
idx = (m_curBit / BYTE_NUMBIT) % N_ADTS_SIZE;
numUsed = m_curBit % BYTE_NUMBIT;
#ifndef DRM
if (numUsed == 0)
m_pBuf[idx] = 0;
#endif
m_pBuf[idx] |= (data & ((1<<numBit)-1)) << (BYTE_NUMBIT-numUsed-numBit);
m_curBit += numBit;
return 0;
}
eAACSample CADTS::GetSampleIndex(const UInt32 nSample)
{
eAACSample eSample = E_AAC_SAMPLE_RESERVED;
static std::map<UInt32 , eAACSample> mpSample;
if (mpSample.empty())
{
mpSample[96000] = E_AAC_SAMPLE_96000_HZ;
mpSample[88200] = E_AAC_SAMPLE_88200_HZ;
mpSample[64000] = E_AAC_SAMPLE_64000_HZ;
mpSample[48000] = E_AAC_SAMPLE_48000_HZ;
mpSample[44100] = E_AAC_SAMPLE_44100_HZ;
mpSample[32000] = E_AAC_SAMPLE_32000_HZ;
mpSample[24000] = E_AAC_SAMPLE_24000_HZ;
mpSample[22050] = E_AAC_SAMPLE_22050_HZ;
mpSample[16000] = E_AAC_SAMPLE_16000_HZ;
mpSample[12000] = E_AAC_SAMPLE_12000_HZ;
mpSample[11025] = E_AAC_SAMPLE_11025_HZ;
mpSample[8000] = E_AAC_SAMPLE_8000_HZ;
mpSample[7350] = E_AAC_SAMPLE_7350_HZ;
};
if (mpSample.find(nSample) != mpSample.end())
{
eSample = mpSample[nSample];
}
return eSample;
}
eAACChannel CADTS::GetChannelIndex(const UInt32 nChannel)
{
eAACChannel eChannel = E_AAC_CHANNEL_RESERVED;
static std::map<UInt32 , eAACChannel> mpChannel;
if (mpChannel.empty())
{
mpChannel[0] = E_AAC_CHANNEL_SPECIFC_CONFIG;
mpChannel[1] = E_AAC_CHANNEL_MONO;
mpChannel[2] = E_AAC_CHANNEL_STEREO;
mpChannel[3] = E_AAC_CHANNEL_TRIPLE_TRACK;
mpChannel[4] = E_AAC_CHANNEL_4;
mpChannel[5] = E_AAC_CHANNEL_5;
mpChannel[6] = E_AAC_CHANNEL_6;
mpChannel[8] = E_AAC_CHANNEL_8;
};
if (mpChannel.find(nChannel) != mpChannel.end())
{
eChannel = mpChannel[nChannel];
}
return eChannel;
}
採坑心得
1、協議解析優先考慮成熟的開原始碼,例如ffmpeg,流媒體相關的協議裡面基本都有實現;
2、如果找不到成熟開原始碼做參考,搜尋協議規範文件,不復雜的話照著文件一步步做吧,規範文件比較系統全面,比網上東拼西湊找的靠譜,最後花的時間可能比亂搜一通要少,而且自己解析印象更深刻。