Android放音的采樣率固定為44.1KHz,錄音的采樣率固定為8KHz,因此底層的音頻設備驅動需要設置好這兩個固定的采樣率。如果上層傳過來的采樣率不符的話,需要進行resample重采樣處理。
幾個名詞:
1. 采樣率
采樣設備每秒抽取樣本的次數
2. 音頻格式及量化精度(位寬)
每種音頻格式有不同的量化精度(位寬),位數越多,表示值就越精確,聲音表現自然就越精准。FFMpeg中音頻格式有以下幾種,每種格式有其占用的字節數信息:
enum AVSampleFormat { AV_SAMPLE_FMT_NONE = -1, AV_SAMPLE_FMT_U8, ///< unsigned 8 bits AV_SAMPLE_FMT_S16, ///< signed 16 bits AV_SAMPLE_FMT_S32, ///< signed 32 bits AV_SAMPLE_FMT_FLT, ///< float AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar AV_SAMPLE_FMT_FLTP, ///< float, planar AV_SAMPLE_FMT_DBLP, ///< double, planar AV_SAMPLE_FMT_S64, ///< signed 64 bits AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically };
3. 分片(plane)和打包(packed)
以雙聲道為例,帶P(plane)的數據格式在存儲時,其左聲道和右聲道的數據是分開存儲的,左聲道的數據存儲在data[0],右聲道的數據存儲在data[1],每個聲道的所占用的字節數為linesize[0]和linesize[1];
不帶P(packed)的音頻數據在存儲時,是按照LRLRLR...的格式交替存儲在data[0]中,linesize[0]表示總的數據量。
4. 聲道分布(channel_layout)
聲道分布在FFmpeg\libavutil\channel_layout.h中有定義,一般來說用的比較多的是AV_CH_LAYOUT_STEREO(雙聲道)和AV_CH_LAYOUT_SURROUND(三聲道),這兩者的定義如下:
#define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
#define AV_CH_LAYOUT_SURROUND (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)
5. 音頻幀的數據量計算
一幀音頻的數據量=channel數 * nb_samples樣本數 * 每個樣本占用的字節數
如果該音頻幀是FLTP格式的PCM數據,包含1024個樣本,雙聲道,那么該音頻幀包含的音頻數據量是2*1024*4=8192字節。
6. 音頻播放時間計算
以采樣率44100Hz來計算,每秒44100個sample,而正常一幀為1024個sample,可知每幀播放時間/1024=1000ms/44100,得到每幀播放時間=1024*1000/44100=23.2ms。
7. 音頻重采樣(resample)
FFMpeg自帶的resample例子:FFmpeg\doc\examples\resampling_audio.c,這里把最核心的resample代碼貼一下,在工程中使用時,注意設置的各種參數,給定的輸入數據都不能錯。
int main(int argc, char **argv) {
// 設置數據源src和dst聲道布局 int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
// 設置src和dst采樣率 int src_rate = 48000, dst_rate = 44100; uint8_t **src_data = NULL, **dst_data = NULL; int src_nb_channels = 0, dst_nb_channels = 0; int src_linesize, dst_linesize; int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;
// 設置src和dst音頻格式 enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL, dst_sample_fmt = AV_SAMPLE_FMT_S16; const char *dst_filename = NULL; FILE *dst_file; int dst_bufsize; const char *fmt;
// 重采樣上下文,包含resample信息 struct SwrContext *swr_ctx; double t; int ret; if (argc != 2) { fprintf(stderr, "Usage: %s output_file\n" "API example program to show how to resample an audio stream with libswresample.\n" "This program generates a series of audio frames, resamples them to a specified " "output format and rate and saves them to an output file named output_file.\n", argv[0]); exit(1); }
// resample后的數據保存到本地文件 dst_filename = argv[1]; dst_file = fopen(dst_filename, "wb"); if (!dst_file) { fprintf(stderr, "Could not open destination file %s\n", dst_filename); exit(1); } /* create resampler context */ swr_ctx = swr_alloc(); if (!swr_ctx) { fprintf(stderr, "Could not allocate resampler context\n"); ret = AVERROR(ENOMEM); goto end; } /* set options */ // 將resample信息寫入resample上下文
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0); av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0); av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0); av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0); av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0); av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0); /* initialize the resampling context */ if ((ret = swr_init(swr_ctx)) < 0) { fprintf(stderr, "Failed to initialize the resampling context\n"); goto end; } /* allocate source and destination samples buffers */ src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout); ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels, src_nb_samples, src_sample_fmt, 0); if (ret < 0) { fprintf(stderr, "Could not allocate source samples\n"); goto end; } /* compute the number of converted samples: buffering is avoided * ensuring that the output buffer will contain at least all the * converted input samples */ max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP); /* buffer is going to be directly written to a rawaudio file, no alignment */ dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout); ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 0); if (ret < 0) { fprintf(stderr, "Could not allocate destination samples\n"); goto end; } t = 0; do { /* generate synthetic audio */
// 這里是自行生成源數據幀,實際工程中應該將解碼后的PCM數據填入src_data中 fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t); /* compute destination number of samples */ dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP); if (dst_nb_samples > max_dst_nb_samples) { av_freep(&dst_data[0]); ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 1); if (ret < 0) break; max_dst_nb_samples = dst_nb_samples; } /* convert to destination format */ // 重采樣操作
ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples); if (ret < 0) { fprintf(stderr, "Error while converting\n"); goto end; } dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1); if (dst_bufsize < 0) { fprintf(stderr, "Could not get sample buffer size\n"); goto end; } printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret); fwrite(dst_data[0], 1, dst_bufsize, dst_file); } while (t < 10); if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0) goto end; fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n" "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n", fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename); end: fclose(dst_file); if (src_data) av_freep(&src_data[0]); av_freep(&src_data); if (dst_data) av_freep(&dst_data[0]); av_freep(&dst_data); swr_free(&swr_ctx); return ret < 0; }