替換unimrcp的VAD模塊

本文轉載自查看原文 2019-08-08 19:48 942 unimrcp

摘要：

unimrcp vad 模塊voice activity dector一直認為比較粗暴，而且unimrcp的社區也很久沒有更新了。使用原始unimrcp如果只是用來做Demo演示，通過手動調整參數，還是可以的。但是距離生產環境，還是有很遠的一段路。

這篇文章介紹如何使用webRtc vad模塊替換原來的算法。

【題外話：昨天開了題目，因為有事，沒有更新，今天補上】

unimrcp 的vad的模塊，在libs/mpf/src/mpf_activity_detector.c 文件中，主要算法函數如下：

 1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
 2 {
 3     apr_size_t sum = 0;
 4     apr_size_t count = frame->codec_frame.size/2;
 5     const apr_int16_t *cur = frame->codec_frame.buffer;
 6     const apr_int16_t *end = cur + count;
 7 
 8     for(; cur < end; cur++) {
 9         if(*cur < 0) {
10             sum -= *cur;
11         }
12         else {
13             sum += *cur;
14         }
15     }
16 
17     return sum / count;
18 }

大家看這個算法，非常簡單粗暴，累加求其平均值，如果大於閾值，表示有聲音，如果不大於，表示靜音。並沒有噪音檢測。所以基本上就是不可用。

在上一篇文檔介紹了WebRTC 的 VAD的算法，今天主要使用webRTC 的VAD的算法，替換該算法。步驟和上一篇介紹webRTC的是一致的。

 1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
 2 {
 3   //calculate samplesCount
 4   apr_size_t samplesCount = frame->codec_frame.size/2;
 5   //default 10
 6   int per_ms_frames = 10;
 7   //calculate samples
 8   apr_size_t sampleRate = 16000;
 9   //
10   size_t samples = sampleRate * per_ms_frames / 1000;
11   if (samples == 0) return -1;
12   //
13   size_t nTotal = (samplesCount / samples);
14   //buffer
15   int16_t *input = frame->codec_frame.buffer;
16   //init vad
17   VadInst * vadInst = WebRtcVad_Create();
18   if (vadInst == NULL) {
19     return -1;
20   }
21   int status = WebRtcVad_Init(vadInst);
22   if (status != 0) {
23     WebRtcVad_Free(vadInst);
24     return -1;
25   }
26   //default 1
27   int16_t vad_mode = 1;
28   status = WebRtcVad_set_mode(vadInst, vad_mode);
29   if (status != 0) {
30     WebRtcVad_Free(vadInst);
31     return -1;
32   }
33   int cnt = 0;
34   int i  = 0;
35   for (i = 0; i < nTotal; i++) {
36     int keep_weight = 0;
37     int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
38     if (nVadRet == -1) {
39       WebRtcVad_Free(vadInst);
40       return -1;
41     } else {
42       if (nVadRet >= 1) {
43         cnt++;
44       }
45       printf(" %d \t", nVadRet);
46     }
47     input += samples;
48   }
49   //if hunman voice < nTotal/10, as silent sample。maybe ...
     //FIXME 
50   if (cnt < nTotal/10) {
51     return 0;
52   }
53   else {
54     return 1;
55   }
56 }
    WebRtcVad_Free(vadInst)

下面要更新主處理函數，保留他原有的TRANSION的中間狀態邏輯，

 1 /** Process current frame */
 2 MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
 3 {
 4     mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
 5     apr_size_t level = 0;
 6     if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
 7         /* first, calculate current activity level of processed frame */
 8         level = mpf_activity_detector_level_calculate(frame);
 9 #if 0
10         apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);
11 #endif
12     }
13 
14     if(detector->state == DETECTOR_STATE_INACTIVITY) {
15         //if(level >= detector->level_threshold) {
16         if(level >= 1) {
17             /* start to detect activity */
18             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);
19         }
20         else {
21             detector->duration += CODEC_FRAME_TIME_BASE;
22             if(detector->duration >= detector->noinput_timeout) {
23                 /* detected noinput */
24                 det_event = MPF_DETECTOR_EVENT_NOINPUT;
25             }
26         }
27     }
28     else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
29         //if(level >= detector->level_threshold) {
30         if(level >= 1) {
31             detector->duration += CODEC_FRAME_TIME_BASE;
32             if(detector->duration >= detector->speech_timeout) {
33                 /* finally detected activity */
34                 det_event = MPF_DETECTOR_EVENT_ACTIVITY;
35                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
36             }
37         }
38         else {
39             /* fallback to inactivity */
40             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
41         }
42     }
43     else if(detector->state == DETECTOR_STATE_ACTIVITY) {
44         //if(level >= detector->level_threshold) {
45         if(level >= 1) {
46             detector->duration += CODEC_FRAME_TIME_BASE;
47         }
48         else {
49             /* start to detect inactivity */
50             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);
51         }
52     }
53     else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
54         //if(level >= detector->level_threshold) {
55         if(level >= 1) {
56             /* fallback to activity */
57             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
58         }
59         else {
60             detector->duration += CODEC_FRAME_TIME_BASE;
61             if(detector->duration >= detector->silence_timeout) {
62                 /* detected inactivity */
63                 det_event = MPF_DETECTOR_EVENT_INACTIVITY;
64                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
65             }
66         }
67     }
68 
69     return det_event;
70 }

如此替換后，就完成了算法的更新。當然還需要調整一下cmake的相關的文件配置，加載相應的webRTC的vad文件。

static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
{
  //calculate samplesCount
  apr_size_t samplesCount = frame->codec_frame.size/2;
  //default 10
  int per_ms_frames = 10;
  //calculate samples
  apr_size_t sampleRate = 16000;
  //
  size_t samples = sampleRate * per_ms_frames / 1000;
  if (samples == 0) return -1;
  //
  size_t nTotal = (samplesCount / samples);
  //buffer
  int16_t *input = frame->codec_frame.buffer;
  //init vad
  VadInst * vadInst = WebRtcVad_Create();
  if (vadInst == NULL) {
    return -1;
  }
  int status = WebRtcVad_Init(vadInst);
  if (status != 0) {
    WebRtcVad_Free(vadInst);
    return -1;
  }
  //default 1
  int16_t vad_mode = 1;
  status = WebRtcVad_set_mode(vadInst, vad_mode);
  if (status != 0) {
    WebRtcVad_Free(vadInst);
    return -1;
  }
  int cnt = 0;
  int i  = 0;
  for (i = 0; i < nTotal; i++) {
    int keep_weight = 0;
    int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
    if (nVadRet == -1) {
      WebRtcVad_Free(vadInst);
      return -1;
    } else {
      if (nVadRet >= 1) {
        cnt++;
      }
      printf(" %d \t", nVadRet);
    }
    input += samples;
  }
  //if hunman voice < nTotal/10, as silent sample
  if (cnt < nTotal/10) {
    return 0;
  }
  else {
    return 1;
  }

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 【webpack】-- 模塊熱替換 unimrcp plugin 分析 WebRTC的VAD 過程解讀 UNIMRCP 代碼走讀【語音AI】VAD簡介 nginx sub模塊替換文本 docxtpl模塊的word模板替換內容 webpack 模塊熱替換的理解和使用 Python sh模塊--------替換subprocess的利器 Webpack 開發工具與模塊熱替換