webrtc 3A移植以及实时处理
文章目录
- 前言
- 一、交叉编译
- 1.Pulse Audio webrtc-audio-processing
- 2.交叉编译
- 二、基于alsa进行实时3A处理
- 1.demo源码
- 2.注意项
- 3.效果展示
- 总结
前言
由于工作需要,硬件3A中的AEC效果实在太差,后面使用SpeexDSP的软3A,效果依旧不是很好,猜测是内部时延估计和时延补偿做的并不是很好,于是采用了webrtc的3A算法,这里记录一下3A移植过程。
|版本声明:山河君,未经博主允许,禁止转载
一、交叉编译
1.Pulse Audio webrtc-audio-processing
在linux下,webrtc 3A是比较好移植的,原因是Pulse Audio是支持webrtc 3A插件的,也就是说,不需要我们自己翻墙下载配置webrtc的环境以及编译链路,Pulse Audio已经帮我们做好了这一步,剩下的就是交叉编译的工作。
对应的gitlab地址:Pulse Audio webrtc-audio-processing
2.交叉编译
先选择好版本,截至到当前博客时间,最新版本是1.3.1,网上之前也有介绍的,基本都是0.3版本的
1.0版本和1.0之前的版本最大的区别就是编译器的不同,之前是通过脚本配置,1.0后都是使用meson 进行配置,所以需要下载meson 以及ninja(用于编译webrtc)
meson和代码下载之后,就需要配置交叉编译链路,meson对应交叉编译器环境需要我们自己写meson配置文件,在源文件目录下,打开cross_file.txt,内容如下:
[binaries]
c = '/usr/bin/aarch64-linux-gnu-gcc'
cpp = '/usr/bin/aarch64-linux-gnu-g++'
ar = '/usr/bin/aarch64-linux-gnu-ar'
strip = '/usr/bin/aarch64-linux-gnu-strip'
pkgconfig = '/usr/bin/aarch64-linux-gnu-pkg-config'[host_machine]
system = 'linux'
cpu_family = 'aarch64'
cpu = 'armv8-a'
endian = 'little'[paths]
prefix = '/home/aaron/workplace/webrtc-audio-processing/build'
在源文件目录下创建编译缓存目录以及安装目录
meson . build -Dprefix=$PWD/install --cross-file=cross_file.txt
ninja -C build
ninja -C build install
最后在install目录下可以看到编译好的文件
二、基于alsa进行实时3A处理
1.demo源码
编译好的完整demo下载:demo下载,如果没有积分的话就自己编译,这里只是少了demo的脚本
#include <stdio.h>
#include <stdlib.h>
#include <alsa/asoundlib.h>
#include <pthread.h>
#include <errno.h>
#include <mutex>
#include "modules/audio_processing/include/audio_processing.h"constexpr int sample_rate_hz = 16000; // 假设采样率为16kHz
constexpr size_t num_channels = 1; // 假设单声道
constexpr size_t frame_size = sample_rate_hz / 100; // 10ms 帧大小#define CHANNELS 2
#define SAMPLE_RATE 16000
#define PERIOD_SIZE 160
#define BUFFER_SIZE (PERIOD_SIZE * 2)
bool gRun = true;typedef struct
{snd_pcm_t *playback_handle;FILE *pcm_file;int8_t m_pPlayoutBuffer[PERIOD_SIZE * 2 * 2];int m_nPlayoutBufferSizeIn20MS = PERIOD_SIZE * 2 * 2;int m_nPlayoutFramesLeft = 0;snd_pcm_t *capture_handle;FILE *pcm_out_file;int8_t m_pRecordBuffer[PERIOD_SIZE * 2 * 2];int m_nRecordBufferSizeIn20MS = PERIOD_SIZE * 2 * 2;int m_nRecordingFramesLeft = PERIOD_SIZE;FILE *pcm_3a_file;FILE *pcm_ref_file;int8_t m_pRefBuffer[PERIOD_SIZE * 2];int8_t m_p3ABuffer[PERIOD_SIZE * 2]; // in byteint8_t m_pNearBuffer[PERIOD_SIZE * 2]; // in bytertc::scoped_refptr<webrtc::AudioProcessing> apm;
} audio_data_t;void monoTurnStereo(const int16_t *pSrc, int16_t *pDts, size_t size)
{for (int j = 0; j < size; j++){pDts[2 * j] = pSrc[j];pDts[2 * j + 1] = pSrc[j];}
}void stereoTurnMono(const unsigned char *pSrc, unsigned char *pDts, size_t size)
{int nLeftCount = 0;for (size_t i = 0; i < size; i += 4){pDts[nLeftCount] = pSrc[i];pDts[nLeftCount + 1] = pSrc[i + 1];nLeftCount += 2;}
}int32_t errorRecovery(int32_t nRet, snd_pcm_t *pDeviceHandle)
{int st = snd_pcm_state(pDeviceHandle);printf("Trying to recover from %s error: %s nRet:(%d) (state:%d)\n",((snd_pcm_stream(pDeviceHandle) ==SND_PCM_STREAM_CAPTURE)? "capture": "playout"),snd_strerror(nRet),nRet,st);int res = snd_pcm_recover(pDeviceHandle, nRet, 1);if (0 == res){printf("Recovery - snd_pcm_recover OK\n");if ((nRet == -EPIPE || nRet == -ESTRPIPE) &&snd_pcm_stream(pDeviceHandle) == SND_PCM_STREAM_CAPTURE){// For capture streams we also have to repeat the explicit start()// to get data flowing again.int nRet = snd_pcm_start(pDeviceHandle);if (nRet != 0){printf("Recovery - snd_pcm_start error: %d\n", nRet);return -1;}}if ((nRet == -EPIPE || nRet == -ESTRPIPE) &&snd_pcm_stream(pDeviceHandle) == SND_PCM_STREAM_PLAYBACK){// For capture streams we also have to repeat the explicit start() to get// data flowing again.snd_pcm_state_t state = snd_pcm_state(pDeviceHandle);if (state != SND_PCM_STATE_PREPARED){snd_pcm_prepare(pDeviceHandle);}int nRet = snd_pcm_start(pDeviceHandle);if (nRet != 0){printf("Recovery - snd_pcm_start error: %s\n", snd_strerror(nRet));return -1;}}return -EPIPE == nRet ? 1 : 0;}else{printf("Unrecoverable alsa stream error: %d\n", res);}return res;
}void *playback_thread(void *arg)
{printf("playback_thread\n");audio_data_t *data = (audio_data_t *)arg;const int policy = SCHED_FIFO;const int min_prio = sched_get_priority_min(policy);const int max_prio = sched_get_priority_max(policy);sched_param param;const int top_prio = max_prio - 1;const int low_prio = min_prio + 1;param.sched_priority = top_prio;pthread_setschedparam(pthread_self(), policy, ¶m);while (gRun){int nRet;snd_pcm_sframes_t sndFrames;snd_pcm_sframes_t sndAvailFrames;sndAvailFrames = snd_pcm_avail_update(data->playback_handle);if (sndAvailFrames < 0){printf("playout snd_pcm_avail_update error: %s\n", snd_strerror(sndAvailFrames));errorRecovery(sndAvailFrames, data->playback_handle);continue;}else if (sndAvailFrames == 0){nRet = snd_pcm_wait(data->playback_handle, 2);// if (nRet == 0)// printf("playout snd_pcm_wait timeout\n");continue;}if (data->m_nPlayoutFramesLeft <= 0){size_t frames = fread(data->m_pRefBuffer, 2, PERIOD_SIZE, data->pcm_file);if (frames == 0 || frames != PERIOD_SIZE){ // 文件播放完毕,重新开始fseek(data->pcm_file, 0, SEEK_SET);continue;}monoTurnStereo((int16_t *)data->m_pRefBuffer, (int16_t *)data->m_pPlayoutBuffer, PERIOD_SIZE);data->m_nPlayoutFramesLeft = frames;}if ((uint32_t)(sndAvailFrames) > data->m_nPlayoutFramesLeft){sndAvailFrames = (uint32_t)data->m_nPlayoutFramesLeft;}int size = snd_pcm_frames_to_bytes(data->playback_handle, data->m_nPlayoutFramesLeft);sndFrames = snd_pcm_writei(data->playback_handle, &data->m_pPlayoutBuffer[data->m_nPlayoutBufferSizeIn20MS - size], sndAvailFrames);if (sndFrames < 0){printf("playout snd_pcm_writei error: %s\n", snd_strerror(sndFrames));data->m_nPlayoutFramesLeft = 0;errorRecovery(sndFrames, data->playback_handle);continue;}else{fwrite(&data->m_pRefBuffer[PERIOD_SIZE * 2 - data->m_nPlayoutFramesLeft * 2], 1, sndFrames * 2, data->pcm_ref_file);data->m_nPlayoutFramesLeft -= sndFrames;}}return NULL;
}void *capture_thread(void *arg)
{printf("capture_thread\n");audio_data_t *data = (audio_data_t *)arg;const int policy = SCHED_FIFO;const int min_prio = sched_get_priority_min(policy);const int max_prio = sched_get_priority_max(policy);sched_param param;const int top_prio = max_prio - 1;const int low_prio = min_prio + 1;param.sched_priority = top_prio;pthread_setschedparam(pthread_self(), policy, ¶m);while (gRun){int nRet;snd_pcm_sframes_t sndFrames;snd_pcm_sframes_t sndAvailFrames;int8_t buffer[data->m_nRecordBufferSizeIn20MS];sndAvailFrames = snd_pcm_avail_update(data->capture_handle);if (sndAvailFrames < 0){printf("capture snd_pcm_avail_update error: %s\n", snd_strerror(sndAvailFrames));errorRecovery(sndAvailFrames, data->capture_handle);continue;}else if (sndAvailFrames == 0){continue;}if ((uint32_t)(sndAvailFrames) > data->m_nRecordingFramesLeft)sndAvailFrames = data->m_nRecordingFramesLeft;sndFrames = snd_pcm_readi(data->capture_handle, buffer, sndAvailFrames);if (sndFrames < 0){printf("capture snd_pcm_readi error: %s\n", snd_strerror(sndFrames));errorRecovery(sndFrames, data->capture_handle);continue;}else if (sndFrames > 0){int nLeftSize = snd_pcm_frames_to_bytes(data->capture_handle, data->m_nRecordingFramesLeft);int size = snd_pcm_frames_to_bytes(data->capture_handle, sndFrames);memcpy(&data->m_pRecordBuffer[data->m_nRecordBufferSizeIn20MS - nLeftSize], buffer, size);data->m_nRecordingFramesLeft -= sndFrames;}if (!data->m_nRecordingFramesLeft){data->m_nRecordingFramesLeft = PERIOD_SIZE;stereoTurnMono((unsigned char *)data->m_pRecordBuffer, (unsigned char *)data->m_pNearBuffer, PERIOD_SIZE * 2 * 2);fwrite(data->m_pNearBuffer, 1, PERIOD_SIZE * 2, data->pcm_out_file);webrtc::StreamConfig stream_config(sample_rate_hz, num_channels);if (data->apm->ProcessReverseStream((int16_t *)data->m_pRefBuffer, stream_config, stream_config, (int16_t *)data->m_pRefBuffer) != webrtc::AudioProcessing::kNoError){printf("ProcessReverseStream fail\n");}if (data->apm->ProcessStream((int16_t *)data->m_pNearBuffer, stream_config, stream_config, (int16_t *)data->m_p3ABuffer) != webrtc::AudioProcessing::kNoError){printf("ProcessStream fail\n");}fwrite(data->m_p3ABuffer, 1, PERIOD_SIZE * 2, data->pcm_3a_file);}}return NULL;
}int setup_pcm_device(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream)
{snd_pcm_hw_params_t *params;int err;if ((err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK)) < 0){printf("无法打开 PCM 设备 %s: %s\n", device, snd_strerror(err));return err;}snd_pcm_hw_params_alloca(¶ms);snd_pcm_hw_params_any(*handle, params);snd_pcm_hw_params_set_access(*handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);snd_pcm_hw_params_set_format(*handle, params, SND_PCM_FORMAT_S16_LE);snd_pcm_hw_params_set_channels(*handle, params, CHANNELS);snd_pcm_hw_params_set_rate(*handle, params, SAMPLE_RATE, 0);snd_pcm_hw_params_set_period_size(*handle, params, PERIOD_SIZE, 0);snd_pcm_hw_params_set_buffer_size(*handle, params, PERIOD_SIZE * 4);if ((err = snd_pcm_hw_params(*handle, params)) < 0){printf("设置 PCM 参数失败: %s\n", snd_strerror(err));snd_pcm_close(*handle);return err;}return 0;
}int main(int argc, char *argv[])
{audio_data_t data;if ((data.pcm_file = fopen("./far.pcm", "rb")) == NULL ||(data.pcm_3a_file = fopen("./3a.pcm", "wb")) == NULL ||(data.pcm_out_file = fopen("./near.pcm", "wb")) == NULL ||(data.pcm_ref_file = fopen("./ref.pcm", "wb")) == NULL){printf("fail to open file\n");return -1;}// 3adata.apm = webrtc::AudioProcessingBuilder().Create();webrtc::AudioProcessing::Config config;// 噪声抑制配置config.noise_suppression.enabled = true;config.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh;// 增益控制配置config.gain_controller1.enabled = true;config.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital;config.gain_controller1.target_level_dbfs = 3; // 目标输出电平// 回声抑制配置config.echo_canceller.enabled = true;config.echo_canceller.mobile_mode = false; // 如果是移动设备可以设置为 true// 语音活动检测(可选)config.voice_detection.enabled = true;// 应用配置data.apm->ApplyConfig(config);//if (setup_pcm_device(&data.playback_handle, "hw:0,0", SND_PCM_STREAM_PLAYBACK) < 0){fclose(data.pcm_file);return -1;}if (setup_pcm_device(&data.capture_handle, "hw:0,0", SND_PCM_STREAM_CAPTURE) < 0){snd_pcm_close(data.playback_handle);fclose(data.pcm_file);return -1;}pthread_t play_thread, record_thread;pthread_create(&play_thread, NULL, playback_thread, &data);int nRet = snd_pcm_prepare(data.playback_handle);if (nRet < 0){printf("playout snd_pcm_prepare failed (%s)\n", snd_strerror(nRet));}pthread_create(&record_thread, NULL, capture_thread, &data);nRet = snd_pcm_prepare(data.capture_handle);if (nRet < 0){printf("capture snd_pcm_prepare failed:%s \n", snd_strerror(nRet));}nRet = snd_pcm_start(data.capture_handle);if (nRet < 0){printf("capture snd_pcm_start err:%s\n", snd_strerror(nRet));nRet = snd_pcm_start(data.capture_handle);if (nRet < 0){printf("capture snd_pcm_start 2nd try err:%s\n", snd_strerror(nRet));return false;}}getchar();gRun = false;pthread_join(play_thread, NULL);pthread_join(record_thread, NULL);snd_pcm_close(data.playback_handle);snd_pcm_close(data.capture_handle);fclose(data.pcm_file);fclose(data.pcm_out_file);fclose(data.pcm_3a_file);fclose(data.pcm_ref_file);printf("end................... \n");return 0;
}
2.注意项
- webrtc audio processing是基于10ms为一帧进行处理的
- 当前版本中可以设置3A配置等级,具体3A参数调参请参考我另一篇文章音频3A一——webrtc源码3A的启用方法和具体流程
- 对于资源消耗,如果没有对资源特别要求,或者其他特殊情况,尽量不要追求类似于WebRtcAec_Process,WebRtcAgc_Process这种方式单独使用3A的某一个模块,而是通过audio_processing进行处理
- 对于时延,如果有固定时延,应该对于AEC进行设置
3.效果展示
远端参考信号
近端采集信号
回声消除后的信号
总结
webrtc不愧是音视频领域的顶尖,值得我们学习的东西太多了。实际上demo里对于设备的读写,也是从webrtc中摘录出来的。
如果对您有所帮助,请帮忙点个赞吧!