为什么要重采样
从设备采集的音频数据与编码器要求的数据不一致 扬声器要求的音频数据与要播放的音频数据不一致 更方便运算(回音消除须使用单声道,需要先转换)
比如说语音识别,需要很低的采样率就可以了,高了增加了数据量,毫无用处,这时候就需要进行音频重采样,重采样可以改变音频采样值或采样格式。
完整代码
代码参考了FFmpeg示例,利用 fill_samples() 函数生成正弦波音频数据,然后,实现将48000采样值转换成44100的功能。代码一开始做了最基本的初始化,然后分配空间,计算采样值,最后进行采样值的转换,将转换后的数据写入本地文件。代码中的关键函数是 swr_convert(),采样值的转换就是靠他完成。 以下代码在Qt5.14.0中验证OK,代码如下:
#ifdef __cplusplus
extern "C"
{
#endif
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#ifdef __cplusplus
}
#endif
#define IN_RATE 48000
#define OUT_RATE 44100
static int get_format_from_sample_fmt(const char **fmt,
enum AVSampleFormat sample_fmt)
{
char *lettle_end = NULL;
char *big_end = NULL;
switch ((int)sample_fmt) {
case AV_SAMPLE_FMT_U8: big_end = "u8"; lettle_end = "u8"; break;
case AV_SAMPLE_FMT_S16: big_end = "s16be"; lettle_end = "s16le"; break;
case AV_SAMPLE_FMT_S32: big_end = "s32be"; lettle_end = "s32le"; break;
case AV_SAMPLE_FMT_FLT: big_end = "f32be"; lettle_end = "f32le"; break;
case AV_SAMPLE_FMT_DBL: big_end = "f64be"; lettle_end = "f64le"; break;
default: return AVERROR(EINVAL);
}
*fmt = AV_NE(big_end, lettle_end);
return 0;
}
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
int i, j;
double tincr = 1.0 / sample_rate, *dstp = dst;
const double c = 2 * M_PI * 440.0;
for (i = 0; i < nb_samples; i++) {
*dstp = sin(c * *t);
for (j = 1; j < nb_channels; j++)
dstp[j] = dstp[0];
dstp += nb_channels;
*t += tincr;
}
}
int main()
{
struct SwrContext *swr_ctx = swr_alloc();
enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL;
enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
uint8_t **src_data = NULL;
uint8_t **dst_data = NULL;
int src_nb_channels = av_get_channel_layout_nb_channels(AV_CH_LAYOUT_STEREO);
int dst_nb_channels = av_get_channel_layout_nb_channels(AV_CH_LAYOUT_STEREO);
int src_linesize;
int dst_linesize;
int src_nb_samples = 1024;
int dst_nb_samples;
int max_dst_nb_samples;
int dst_bufsize;
const char *dst_filename = "out.pcm";
FILE *dst_file = fopen(dst_filename, "wb");
const char *fmt;
double t = 0.0;
int ret = 0;
if (!dst_file) { exit(1); }
if (!swr_ctx) { goto end; }
av_opt_set_int(swr_ctx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(swr_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", IN_RATE, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", OUT_RATE, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
if (swr_init(swr_ctx) < 0) { goto end; }
ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels, src_nb_samples, src_sample_fmt, 0);
if (ret < 0) { goto end; }
max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, OUT_RATE, IN_RATE, AV_ROUND_UP);
ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 0);
if (ret < 0) { goto end; }
do {
fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, IN_RATE, &t);
int64_t delay = swr_get_delay(swr_ctx, IN_RATE);
dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, OUT_RATE, IN_RATE, AV_ROUND_UP);
if (dst_nb_samples > max_dst_nb_samples) {
av_freep(&dst_data[0]);
ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 1);
if (ret < 0) { break; }
max_dst_nb_samples = dst_nb_samples;
}
if ((ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples)) < 0) { goto end; }
if ((dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1)) < 0) { goto end; }
printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
fwrite(dst_data[0], 1, dst_bufsize, dst_file);
} while (t < 10);
if (swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0) < 0) { goto end; }
if ((dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1)) < 0) { goto end; }
printf("flush in:%d out:%d\n", 0, ret);
fwrite(dst_data[0], 1, dst_bufsize, dst_file);
if (get_format_from_sample_fmt(&fmt, dst_sample_fmt) < 0) { goto end; }
fprintf(stderr, "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n",
fmt, (int64_t)AV_CH_LAYOUT_STEREO, dst_nb_channels, OUT_RATE, dst_filename);
end:
fclose(dst_file);
if (src_data)
av_freep(&src_data[0]);
av_freep(&src_data);
if (dst_data)
av_freep(&dst_data[0]);
av_freep(&dst_data);
swr_free(&swr_ctx);
return 0;
}
运行结果
编译运行后会在输出目录看到一个out.pcm文件,使用ffplay -f s16le -channel_layout 3 -channels 2 -ar 44100 out.pcm命令即可播放,声音为一个连续的但音调。
|