WebRTC :NetEQ原始碼分析
隨著WebRTC版本的不斷更新,內部程式碼結構也有了很大的變化,但是對於底層的音視訊引擎來講,卻沒有什麼太大的變化,因為引擎早很早之前就已經達到了一個非常成熟穩定的狀態(早期的QQ音訊就已經使用了GIPS的NetEQ)。讀研期間我就研究過NetEQ中的一些理論,不過那時候研究的很淺,主要是通過查詢各種資料來學習NetEQ,其實我也很奇怪,如此有大名鼎鼎的技術,並且已經開源了,為什麼網上研究它的資料少之又少,除了西安電子大學吳江銳的那篇碩士論文,基本上就沒有任何有價值的資料了。估計自己以後還會持續的研究NetEQ,所以準備一邊研究一邊記錄。
NetEQ是GIPS公司的核心音訊引擎技術,後來GIPS公司被Google收購,這項技術落到了Google手裡並隨著WebRTC的開源一起被公之於眾。NetEQ是從接收端來處理語音包的,主要的功能是抖動消除,丟包隱藏,在網路延遲大的時候降低丟包率,在網路條件好的時候減小時延。
M66版本的WebRTC中NetEQ程式碼介面的定義在src/modules/audio_coding/neteq/include/neteq.h標頭檔案中,而具體的實現是在src/modules/audio_coding/neteq/neteq_impl.cc檔案中,neteq.h檔案中定義了相當多的介面,但其實最重要的介面就只有兩個,第一個是向NetEQ模組中插入解析過後的從網路中來到的RTP資料包,二是從NetEQ模組中取解碼過後的pcm音訊資料。所以其實我們可以很簡單的將NetEQ看成是一個黑盒,我們往裡面扔網路中接收到的RTP資料包,它會給我們吐出解碼或者經過其它處理過後的pcm音訊資料,然後我們拿去播放,至於中間的一些過程,例如抖動消除,解碼,丟包隱藏,語音的拉伸和壓縮以及它們之間如何配合,都是我們可以不用去關心的,其實也就是NetEQ真正的價值所在。
neteq.h的中兩個重要函式分別是InsertPacket()和GetAudio(),在neteq_impl.cc中的具體實現InsertPacketInternal()和GetAudioInternal(),與前面的塞包和取包的描述一一對應。下面主要介紹這兩個函式的主要流程,這兩個函式的流程也就體現了NetEQ整個處理語音包的流程。
int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, rtc::ArrayView<const uint8_t> payload, uint32_t receive_timestamp) { if (payload.empty()) { RTC_LOG_F(LS_ERROR) << "payload is empty"; return kInvalidPointer; } PacketList packet_list; // Insert packet in a packet list. packet_list.push_back([&rtp_header, &payload] { // Convert to Packet. Packet packet; packet.payload_type = rtp_header.payloadType; packet.sequence_number = rtp_header.sequenceNumber; packet.timestamp = rtp_header.timestamp; packet.payload.SetData(payload.data(), payload.size()); // Waiting time will be set upon inserting the packet in the buffer. RTC_DCHECK(!packet.waiting_time); return packet; }()); bool update_sample_rate_and_channels = first_packet_ || (rtp_header.ssrc != ssrc_); if (update_sample_rate_and_channels) { // Reset timestamp scaling. timestamp_scaler_->Reset(); } // RTC_LOG(LS_WARNING) << "external timestamp = " << rtp_header.timestamp; if (!decoder_database_->IsRed(rtp_header.payloadType)) { // Scale timestamp to internal domain (only for some codecs). timestamp_scaler_->ToInternal(&packet_list); // RTC_LOG(LS_WARNING) << "internal timestamp = " << packet_list.front().timestamp; } // Store these for later use, since the first packet may very well disappear // before we need these values. uint32_t main_timestamp = packet_list.front().timestamp; uint8_t main_payload_type = packet_list.front().payload_type; uint16_t main_sequence_number = packet_list.front().sequence_number; // Reinitialize NetEq if it's needed (changed SSRC or first call). if (update_sample_rate_and_channels) { // Note: |first_packet_| will be cleared further down in this method, once // the packet has been successfully inserted into the packet buffer. rtcp_.Init(rtp_header.sequenceNumber); // Flush the packet buffer and DTMF buffer. packet_buffer_->Flush(); dtmf_buffer_->Flush(); // Store new SSRC. ssrc_ = rtp_header.ssrc; // Update audio buffer timestamp. sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_); // Update codecs. timestamp_ = main_timestamp; } // Update RTCP statistics, only for regular packets. rtcp_.Update(rtp_header, receive_timestamp); if (nack_enabled_) { RTC_DCHECK(nack_); if (update_sample_rate_and_channels) { nack_->Reset(); } nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber, rtp_header.timestamp); } // Check for RED payload type, and separate payloads into several packets. if (decoder_database_->IsRed(rtp_header.payloadType)) { if (!red_payload_splitter_->SplitRed(&packet_list)) { return kRedundancySplitError; } // Only accept a few RED payloads of the same type as the main data, // DTMF events and CNG. red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); } // Check payload types. if (decoder_database_->CheckPayloadTypes(packet_list) == DecoderDatabase::kDecoderNotFound) { return kUnknownRtpPayloadType; } RTC_DCHECK(!packet_list.empty()); // Update main_timestamp, if new packets appear in the list // after RED splitting. if (decoder_database_->IsRed(rtp_header.payloadType)) { timestamp_scaler_->ToInternal(&packet_list); main_timestamp = packet_list.front().timestamp; main_payload_type = packet_list.front().payload_type; main_sequence_number = packet_list.front().sequence_number; } // Process DTMF payloads. Cycle through the list of packets, and pick out any // DTMF payloads found. PacketList::iterator it = packet_list.begin(); while (it != packet_list.end()) { const Packet& current_packet = (*it); RTC_DCHECK(!current_packet.payload.empty()); if (decoder_database_->IsDtmf(current_packet.payload_type)) { DtmfEvent event; int ret = DtmfBuffer::ParseEvent(current_packet.timestamp, current_packet.payload.data(), current_packet.payload.size(), &event); if (ret != DtmfBuffer::kOK) { return kDtmfParsingError; } if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { return kDtmfInsertError; } it = packet_list.erase(it); } else { ++it; } } // Update bandwidth estimate, if the packet is not comfort noise. if (!packet_list.empty() && !decoder_database_->IsComfortNoise(main_payload_type)) { // The list can be empty here if we got nothing but DTMF payloads. AudioDecoder* decoder = decoder_database_->GetDecoder(main_payload_type); RTC_DCHECK(decoder); // Should always get a valid object, since we have // already checked that the payload types are known. decoder->IncomingPacket(packet_list.front().payload.data(), packet_list.front().payload.size(), packet_list.front().sequence_number, packet_list.front().timestamp, receive_timestamp); } PacketList parsed_packet_list; while (!packet_list.empty()) { Packet& packet = packet_list.front(); const DecoderDatabase::DecoderInfo* info = decoder_database_->GetDecoderInfo(packet.payload_type); if (!info) { RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type"; return kUnknownRtpPayloadType; } if (info->IsComfortNoise()) { // Carry comfort noise packets along. parsed_packet_list.splice(parsed_packet_list.end(), packet_list, packet_list.begin()); } else { const auto sequence_number = packet.sequence_number; const auto payload_type = packet.payload_type; const Packet::Priority original_priority = packet.priority; auto packet_from_result = [&](AudioDecoder::ParseResult& result) { Packet new_packet; new_packet.sequence_number = sequence_number; new_packet.payload_type = payload_type; new_packet.timestamp = result.timestamp; new_packet.priority.codec_level = result.priority; new_packet.priority.red_level = original_priority.red_level; new_packet.frame = std::move(result.frame); return new_packet; }; std::vector<AudioDecoder::ParseResult> results = info->GetDecoder()->ParsePayload(std::move(packet.payload), packet.timestamp); if (results.empty()) { packet_list.pop_front(); } else { bool first = true; for (auto& result : results) { RTC_DCHECK(result.frame); RTC_DCHECK_GE(result.priority, 0); if (first) { // Re-use the node and move it to parsed_packet_list. packet_list.front() = packet_from_result(result); parsed_packet_list.splice(parsed_packet_list.end(), packet_list, packet_list.begin()); first = false; } else { parsed_packet_list.push_back(packet_from_result(result)); } } } } } // Calculate the number of primary (non-FEC/RED) packets. const int number_of_primary_packets = std::count_if( parsed_packet_list.begin(), parsed_packet_list.end(), [](const Packet& in) { return in.priority.codec_level == 0; }); // Insert packets in buffer. const int ret = packet_buffer_->InsertPacketList( &parsed_packet_list, *decoder_database_, ¤t_rtp_payload_type_, ¤t_cng_rtp_payload_type_, &stats_); if (ret == PacketBuffer::kFlushed) { // Reset DSP timestamp etc. if packet buffer flushed. new_codec_ = true; update_sample_rate_and_channels = true; } else if (ret != PacketBuffer::kOK) { return kOtherError; } if (first_packet_) { first_packet_ = false; // Update the codec on the next GetAudio call. new_codec_ = true; } if (current_rtp_payload_type_) { RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_)) << "Payload type " << static_cast<int>(*current_rtp_payload_type_) << " is unknown where it shouldn't be"; } if (update_sample_rate_and_channels && !packet_buffer_->Empty()) { // We do not use |current_rtp_payload_type_| to |set payload_type|, but // get the next RTP header from |packet_buffer_| to obtain the payload type. // The reason for it is the following corner case. If NetEq receives a // CNG packet with a sample rate different than the current CNG then it // flushes its buffer, assuming send codec must have been changed. However, // payload type of the hypothetically new send codec is not known. const Packet* next_packet = packet_buffer_->PeekNextPacket(); RTC_DCHECK(next_packet); const int payload_type = next_packet->payload_type; size_t channels = 1; if (!decoder_database_->IsComfortNoise(payload_type)) { AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type); assert(decoder); // Payloads are already checked to be valid. channels = decoder->Channels(); } const DecoderDatabase::DecoderInfo* decoder_info = decoder_database_->GetDecoderInfo(payload_type); assert(decoder_info); if (decoder_info->SampleRateHz() != fs_hz_ || channels != algorithm_buffer_->Channels()) { SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels); } if (nack_enabled_) { RTC_DCHECK(nack_); // Update the sample rate even if the rate is not new, because of Reset(). nack_->UpdateSampleRate(fs_hz_); } } // TODO(hlundin): Move this code to DelayManager class. const DecoderDatabase::DecoderInfo* dec_info = decoder_database_->GetDecoderInfo(main_payload_type); assert(dec_info); // Already checked that the payload type is known. delay_manager_->LastDecodedWasCngOrDtmf(dec_info->IsComfortNoise() || dec_info->IsDtmf()); if (delay_manager_->last_pack_cng_or_dtmf() == 0) { // Calculate the total speech length carried in each packet. if (number_of_primary_packets > 0) { const size_t packet_length_samples = number_of_primary_packets * decoder_frame_length_; if (packet_length_samples != decision_logic_->packet_length_samples()) { decision_logic_->set_packet_length_samples(packet_length_samples); delay_manager_->SetPacketAudioLength( rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz_)); } } // Update statistics. if ((int32_t)(main_timestamp - timestamp_) >= 0 && !new_codec_) { // Only update statistics if incoming packet is not older than last played // out packet, and if new codec flag is not set. delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz_); } } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) { // This is first "normal" packet after CNG or DTMF. // Reset packet time counter and measure time until next packet, // but don't update statistics. delay_manager_->set_last_pack_cng_or_dtmf(0); delay_manager_->ResetPacketIatCount(); } return 0; }