上一篇文章我们学习了如何通过FFmpeg解码视频帧,本篇文章我们来学习如何解码音频帧。文章分段讲解视频解码的各个步骤,接着会贴上完整代码,最后进行测试。
准备工作
在开始学习前,我们先准备一个文件夹(Res),在里面放置一个视频文件(video.mp4)和一个音频文件(audio.mp3),时长为1分钟左右即可。 当然,对于没有学过 FFprobe 和 FFplay 的朋友,强烈建议先学习一下它们的常用命令。这两个工具可以用来测试我们的音视频文件。
音频解码的步骤如下。 1.打开文件获取多媒体文件上下文 2.获取音频流 3.打开音频解码器 4.循环读取封装帧 ????4.1.解码音频帧 ????4.2.输出pcm原始数据
获取多媒体文件上下文
...
AVFormatContext *avFormatContext = NULL;
int ret = avformat_open_input(&avFormatContext, inputFilePath, NULL, NULL);
if (ret < 0){
char buff[1024];
av_strerror(ret, buff, sizeof(buff)-1);
cout << "can't open file" << endl;
cout << buff << endl;
avformat_close_input(&avFormatContext);
return -1;
}
...
获取音频流
获取音频流的方式和获取视频流的方式是一样的,都是通过下标来获取的。
...
int audioIndex = av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (audioIndex < 0){
cout << av_get_media_type_string(AVMEDIA_TYPE_AUDIO) << endl;
avformat_close_input(&avFormatContext);
return -1;
}
AVStream *audioStream = avFormatContext->streams[audioIndex];
...
打开音频解码器
static int openAudioCodec(int audioStreamIndex,
AVCodecContext **avCodecContext, AVFormatContext *avFormatContext,char *outputFilePath){
AVStream *avStream;
AVCodec *avCodec = NULL;
AVDictionary *opts = NULL;
if (av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0) < 0) {
cout << "can't find audio stream" << endl;
return -1;
}
else {
avStream = avFormatContext->streams[audioStreamIndex];
avCodec = avcodec_find_decoder(avStream->codecpar->codec_id);
if (avCodec == NULL) {
cout << "can't find audio codec" << endl;
return -1;
}
*avCodecContext = avcodec_alloc_context3(avCodec);
if (*avCodecContext == NULL) {
cout << "can't alloc audio codec context" << endl;
return -1;
}
if (avcodec_parameters_to_context(*avCodecContext, avStream->codecpar) < 0) {
cout << "can't copy input stream params" << endl;
return -1;
}
if (avcodec_open2(*avCodecContext, avCodec, &opts) < 0) {
cout << "can't open audio codec" << endl;
return -1;
}
}
return 0;
}
循环获取封装帧
程序通过循环的方式读取每一个封装帧,一边读取一边解码。
...
AVFrame *avFrame = av_frame_alloc();
while (1){
ret = av_read_frame(avFormatContext, avPacket);
if (ret < 0){
cout << "finished" << endl;
break;
}
if (avPacket->stream_index == audioIndex){
cout << "=============packet=============" << endl;
cout << "pos:" << avPacket->pos << endl;
cout << "handle audio packet" << endl;
decodeAudioPacket(avCodecContext, avPacket, avFrame,outputFile);
cout << "=============packet=============" << endl;
}
av_packet_unref(avPacket);
}
...
解码音频帧
static int decodeAudioPacket(AVCodecContext *avCodecContext, const AVPacket *avPacket,AVFrame *avFrame,FILE *outputFile)
{
int ret = 0;
if (avcodec_send_packet(avCodecContext, avPacket) < 0) {
cout << "error on submit packet" << endl;
return -1;
}
while (ret >= 0) {
ret = avcodec_receive_frame(avCodecContext, avFrame);
if (ret < 0) {
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
return 0;
cout << "error while decodeing" << endl;
return ret;
}
int sampleSize = av_get_bytes_per_sample((AVSampleFormat)avFrame->format);
cout << "--------------frame--------------" << endl;
cout << "sample rate:" << avFrame->sample_rate << endl;
cout << "channel num:" << avFrame->channels << endl;
cout << "format:" << avFrame->format << endl;
cout << "sample nums:" << avFrame->nb_samples << endl;
cout << "sampleSize:" << sampleSize << endl;
cout << "--------------frame--------------" << endl;
...
av_frame_unref(avFrame);
if (ret < 0)
return ret;
}
return 0;
}
输出pcm原始数据
解码音频帧后我们就获取到音频的原始数据了,这里把它输出到文件中去。
PCM数据存放方式如下。对于单声道,每一帧采样数据按序放置即可。对于双声道,每个声道的每一帧采样数据交替放置。
...
for (int i = 0; i < avFrame->nb_samples;i++){
fwrite(avFrame->extended_data[0] + sampleSize * i, 1, sampleSize, outputFile);
fwrite(avFrame->extended_data[1] + sampleSize * i, 1, sampleSize, outputFile);
}
...
完整代码
完整代码如下。
#include "stdafx.h"
#include <iostream>
extern "C"
{
#include "libavformat/avformat.h"
};
using namespace std;
static int decodeAudioPacket(AVCodecContext *avCodecContext, const AVPacket *avPacket,AVFrame *avFrame,FILE *outputFile)
{
int ret = 0;
if (avcodec_send_packet(avCodecContext, avPacket) < 0) {
cout << "error on submit packet" << endl;
return -1;
}
while (ret >= 0) {
ret = avcodec_receive_frame(avCodecContext, avFrame);
if (ret < 0) {
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
return 0;
cout << "error while decodeing" << endl;
return ret;
}
int sampleSize = av_get_bytes_per_sample((AVSampleFormat)avFrame->format);
cout << "--------------frame--------------" << endl;
cout << "sample rate:" << avFrame->sample_rate << endl;
cout << "channel num:" << avFrame->channels << endl;
cout << "format:" << avFrame->format << endl;
cout << "sample nums:" << avFrame->nb_samples << endl;
cout << "sampleSize:" << sampleSize << endl;
cout << "--------------frame--------------" << endl;
for (int i = 0; i < avFrame->nb_samples;i++){
fwrite(avFrame->extended_data[0] + sampleSize * i, 1, sampleSize, outputFile);
fwrite(avFrame->extended_data[1] + sampleSize * i, 1, sampleSize, outputFile);
}
av_frame_unref(avFrame);
if (ret < 0)
return ret;
}
return 0;
}
static int openAudioCodec(int audioStreamIndex,
AVCodecContext **avCodecContext, AVFormatContext *avFormatContext,char *outputFilePath){
AVStream *avStream;
AVCodec *avCodec = NULL;
AVDictionary *opts = NULL;
if (av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0) < 0) {
cout << "can't find audio stream" << endl;
return -1;
}
else {
avStream = avFormatContext->streams[audioStreamIndex];
avCodec = avcodec_find_decoder(avStream->codecpar->codec_id);
if (avCodec == NULL) {
cout << "can't find audio codec" << endl;
return -1;
}
*avCodecContext = avcodec_alloc_context3(avCodec);
if (*avCodecContext == NULL) {
cout << "can't alloc audio codec context" << endl;
return -1;
}
if (avcodec_parameters_to_context(*avCodecContext, avStream->codecpar) < 0) {
cout << "can't copy input stream params" << endl;
return -1;
}
if (avcodec_open2(*avCodecContext, avCodec, &opts) < 0) {
cout << "can't open audio codec" << endl;
return -1;
}
}
return 0;
}
int _tmain(int argc, _TCHAR* argv[])
{
char inputFilePath[100];
cout << "inputfile path: ";
cin >> inputFilePath;
char outputFilePath[100];
cout << "outputfile path:";
cin >> outputFilePath;
FILE *outputFile = NULL;
fopen_s(&outputFile, outputFilePath, "wb");
if (outputFile == NULL){
cout << "can't open output file" << endl;
return -1;
}
AVFormatContext *avFormatContext = NULL;
int ret = avformat_open_input(&avFormatContext, inputFilePath, NULL, NULL);
if (ret < 0){
char buff[1024];
av_strerror(ret, buff, sizeof(buff)-1);
cout << "can't open file" << endl;
cout << buff << endl;
avformat_close_input(&avFormatContext);
return -1;
}
ret = avformat_find_stream_info(avFormatContext, NULL);
if (ret < 0){
char buff[1024];
av_strerror(ret, buff, sizeof(buff)-1);
cout << "can't open stream" << endl;
cout << buff << endl;
avformat_close_input(&avFormatContext);
return -1;
}
av_dump_format(avFormatContext, 0, inputFilePath, 0);
cout << "stream num:" << avFormatContext->nb_streams << endl;
cout << "duration:" << avFormatContext->duration << endl << endl;
int audioIndex = av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (audioIndex < 0){
cout << av_get_media_type_string(AVMEDIA_TYPE_AUDIO) << endl;
avformat_close_input(&avFormatContext);
return -1;
}
AVStream *audioStream = avFormatContext->streams[audioIndex];
cout << "duration:" << audioStream->duration << endl;
AVPacket *avPacket = av_packet_alloc();
cout << "start read frames" << endl;
AVCodecContext *avCodecContext;
if (openAudioCodec(audioIndex, &avCodecContext, avFormatContext,outputFilePath) < 0){
return -1;
}
AVFrame *avFrame = av_frame_alloc();
while (1){
ret = av_read_frame(avFormatContext, avPacket);
if (ret < 0){
cout << "finished" << endl;
break;
}
if (avPacket->stream_index == audioIndex){
cout << "=============packet=============" << endl;
cout << "pos:" << avPacket->pos << endl;
cout << "handle audio packet" << endl;
decodeAudioPacket(avCodecContext, avPacket, avFrame,outputFile);
cout << "=============packet=============" << endl;
}
av_packet_unref(avPacket);
}
cout << "decode finish" << endl;
avformat_close_input(&avFormatContext);
av_free(avPacket);
av_free(avFrame);
fclose(outputFile);
return 0;
}
测试
运行程序,输入刚刚Res文件夹里的音频文件路径(audio.mp3)和输出文件路径。程序输出pcm文件。 通过FFprobe查看audio.mp3的相关信息。
ffprobe -i audio.mp3
如下图,这个音频文件的声道是2,采样率是44100,采样格式是浮点型(32位)。 接着我们通过ffplay来播放输出的pcm文件。-ar 表示采样率,-ac 表示声道,-f 表示采样格式,f32le是指32位小端模式。如果播放时声音很怪,可能是采样、声道或采样格式有问题。
ffplay -ar 44100 -ac 2 -f f32le -i out.pcm
最后
本篇文章讲述了如何通过FFmpeg实现音频解码。
感兴趣的朋友还可以到我的Gitee仓库看看完整工程代码。
参考文章
《FFmpeg的音频处理详解》
|