FFMpeg筆記（三）音頻處理基本概念及音頻重采樣

本文轉載自查看原文 2018-04-05 22:46 9008 FFmpeg/ audio/ resample

Android放音的采樣率固定為44.1KHz，錄音的采樣率固定為8KHz，因此底層的音頻設備驅動需要設置好這兩個固定的采樣率。如果上層傳過來的采樣率不符的話，需要進行resample重采樣處理。

幾個名詞：

1. 采樣率

采樣設備每秒抽取樣本的次數

2. 音頻格式及量化精度（位寬）

每種音頻格式有不同的量化精度（位寬），位數越多，表示值就越精確，聲音表現自然就越精准。FFMpeg中音頻格式有以下幾種，每種格式有其占用的字節數信息：

enum AVSampleFormat { AV_SAMPLE_FMT_NONE = -1, AV_SAMPLE_FMT_U8, ///< unsigned 8 bits  AV_SAMPLE_FMT_S16, ///< signed 16 bits  AV_SAMPLE_FMT_S32, ///< signed 32 bits  AV_SAMPLE_FMT_FLT, ///< float  AV_SAMPLE_FMT_DBL, ///< double 
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar  AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar  AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar  AV_SAMPLE_FMT_FLTP, ///< float, planar  AV_SAMPLE_FMT_DBLP, ///< double, planar  AV_SAMPLE_FMT_S64, ///< signed 64 bits  AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar 
    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically };

3. 分片（plane）和打包（packed）

以雙聲道為例，帶P（plane）的數據格式在存儲時，其左聲道和右聲道的數據是分開存儲的，左聲道的數據存儲在data[0]，右聲道的數據存儲在data[1]，每個聲道的所占用的字節數為linesize[0]和linesize[1]；

不帶P（packed）的音頻數據在存儲時，是按照LRLRLR...的格式交替存儲在data[0]中，linesize[0]表示總的數據量。

4. 聲道分布（channel_layout)

聲道分布在FFmpeg\libavutil\channel_layout.h中有定義，一般來說用的比較多的是AV_CH_LAYOUT_STEREO（雙聲道）和AV_CH_LAYOUT_SURROUND（三聲道），這兩者的定義如下：

#define AV_CH_LAYOUT_STEREO            (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
#define AV_CH_LAYOUT_SURROUND          (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)

5. 音頻幀的數據量計算

一幀音頻的數據量=channel數 * nb_samples樣本數 * 每個樣本占用的字節數

如果該音頻幀是FLTP格式的PCM數據，包含1024個樣本，雙聲道，那么該音頻幀包含的音頻數據量是2*1024*4=8192字節。

6. 音頻播放時間計算

以采樣率44100Hz來計算，每秒44100個sample，而正常一幀為1024個sample，可知每幀播放時間/1024=1000ms/44100，得到每幀播放時間=1024*1000/44100=23.2ms。

7. 音頻重采樣（resample）

FFMpeg自帶的resample例子：FFmpeg\doc\examples\resampling_audio.c，這里把最核心的resample代碼貼一下，在工程中使用時，注意設置的各種參數，給定的輸入數據都不能錯。

int main(int argc, char **argv)
{
    // 設置數據源src和dst聲道布局
    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
    // 設置src和dst采樣率
    int src_rate = 48000, dst_rate = 44100;
    uint8_t **src_data = NULL, **dst_data = NULL;
    int src_nb_channels = 0, dst_nb_channels = 0;
    int src_linesize, dst_linesize;
    int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;
    // 設置src和dst音頻格式
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL, dst_sample_fmt = AV_SAMPLE_FMT_S16;
    const char *dst_filename = NULL;
    FILE *dst_file;
    int dst_bufsize;
    const char *fmt;
    // 重采樣上下文，包含resample信息
    struct SwrContext *swr_ctx;
    double t;
    int ret;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s output_file\n"
                "API example program to show how to resample an audio stream with libswresample.\n"
                "This program generates a series of audio frames, resamples them to a specified "
                "output format and rate and saves them to an output file named output_file.\n",
            argv[0]);
        exit(1);
    }
    // resample后的數據保存到本地文件
    dst_filename = argv[1];

    dst_file = fopen(dst_filename, "wb");
    if (!dst_file) {
        fprintf(stderr, "Could not open destination file %s\n", dst_filename);
        exit(1);
    }

    /* create resampler context */
    swr_ctx = swr_alloc();
    if (!swr_ctx) {
        fprintf(stderr, "Could not allocate resampler context\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    /* set options */
    // 將resample信息寫入resample上下文
    av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);

    av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);

    /* initialize the resampling context */
    if ((ret = swr_init(swr_ctx)) < 0) {
        fprintf(stderr, "Failed to initialize the resampling context\n");
        goto end;
    }

    /* allocate source and destination samples buffers */

    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
    ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
                                             src_nb_samples, src_sample_fmt, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate source samples\n");
        goto end;
    }

    /* compute the number of converted samples: buffering is avoided
     * ensuring that the output buffer will contain at least all the
     * converted input samples */
    max_dst_nb_samples = dst_nb_samples =
        av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);

    /* buffer is going to be directly written to a rawaudio file, no alignment */
    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
                                             dst_nb_samples, dst_sample_fmt, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate destination samples\n");
        goto end;
    }

    t = 0;
    do {
        /* generate synthetic audio */
        // 這里是自行生成源數據幀，實際工程中應該將解碼后的PCM數據填入src_data中
        fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);

        /* compute destination number of samples */
        dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
                                        src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
        if (dst_nb_samples > max_dst_nb_samples) {
            av_freep(&dst_data[0]);
            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                                   dst_nb_samples, dst_sample_fmt, 1);
            if (ret < 0)
                break;
            max_dst_nb_samples = dst_nb_samples;
        }

        /* convert to destination format */
        // 重采樣操作
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
        if (ret < 0) {
            fprintf(stderr, "Error while converting\n");
            goto end;
        }
        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                                 ret, dst_sample_fmt, 1);
        if (dst_bufsize < 0) {
            fprintf(stderr, "Could not get sample buffer size\n");
            goto end;
        }
        printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
        fwrite(dst_data[0], 1, dst_bufsize, dst_file);
    } while (t < 10);

    if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
        goto end;
    fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n"
            "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n",
            fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);

end:
    fclose(dst_file);

    if (src_data)
        av_freep(&src_data[0]);
    av_freep(&src_data);

    if (dst_data)
        av_freep(&dst_data[0]);
    av_freep(&dst_data);

    swr_free(&swr_ctx);
    return ret < 0;
}

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。