時域特征--線性預測系數(LPC)以及LPCC


欲知更多,請關注公眾號:音頻探險記

LPC

線性預測系數的基本思想:由於語音樣點之間存在着相關性,那么當前點/未來點可以用過去的p個樣本點進行預測,即
其中就是要求的LPC,P表示預測階數。
好處:可以得到聲道模型及其模型參數的方法,廣泛用於語音識別以及語音合成中。

import librosa
import python_speech_features
import soundfile as sf
import numpy as np
def lpc(y, order):
    dtype = y.dtype.type
    ar_coeffs = np.zeros(order + 1, dtype=dtype)
    ar_coeffs[0] = dtype(1) # 1.0
    ar_coeffs_prev = np.zeros(order + 1, dtype=dtype)
    ar_coeffs_prev[0] = dtype(1)
    # 前向和后向的預測誤差
    fwd_pred_error = y[1:]
    bwd_pred_error = y[:-1]
    den = np.dot(fwd_pred_error, fwd_pred_error) + np.dot(bwd_pred_error, bwd_pred_error)
    for i in range(order):
        if den <= 0:
            raise FloatingPointError("numerical error, input ill-conditioned?")
        reflect_coeff = dtype(-2) * np.dot(bwd_pred_error, fwd_pred_error) / dtype(den)
        ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
        for j in range(1, i+2):
            ar_coeffs[j] = ar_coeffs_prev[j] + reflect_coeff * ar_coeffs_prev[i - j + 1]
        # 前向預測誤差和后向預測誤差更新
        fwd_pred_error_tmp = fwd_pred_error
        fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
        bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
        q = dtype(1) - reflect_coeff ** 2
        den = q * den - bwd_pred_error[-1]**2 - fwd_pred_error[0]**2
        fwd_pred_error = fwd_pred_error[1:]
        bwd_pred_error = bwd_pred_error[:-1]
    return ar_coeffs
y, sr = sf.read('q1.wav')
frame_size = 160
num_frames = len(y) // frame_size
print(lpc(y, 32))
'''
[ 1.00000000e+00 -3.95327600e+00  8.29868847e+00 -1.27752183e+01
  1.60320420e+01 -1.71512784e+01  1.59802135e+01 -1.29520778e+01
  8.83717438e+00 -4.58646820e+00  8.91619704e-01  1.80827086e+00
 -3.30606685e+00  3.68847432e+00 -3.13823922e+00  2.08868507e+00
 -1.04485702e+00  1.47865339e-01  6.35567557e-01 -1.15391128e+00
  1.35048967e+00 -1.27918423e+00  9.65718801e-01 -5.09474786e-01
  5.94380366e-03  4.28867366e-01 -7.08129489e-01  8.19126446e-01
 -7.55779509e-01  5.73570390e-01 -3.63595930e-01  1.78320700e-01
 -4.54597679e-02]
'''

LPCC

LPCC全稱線性預測倒譜系數(linear predictive cepstral coefficient, LPCC),可以在計算得到LPC后,如果如下的計算公式計算得到LPCC
LPCC是LPC系數在倒譜域的表示,計算量小易於實現,對元音的描述能力較好,對輔音的描述能力較差,抗噪性能差[1]
[1] 數字語音處理及MATLAB仿真
相應代碼如下

import librosa
import python_speech_features
import soundfile as sf
import numpy as np
def lpc(y, order):
    dtype = y.dtype.type
    ar_coeffs = np.zeros(order + 1, dtype=dtype)
    ar_coeffs[0] = dtype(1) # 1.0
    ar_coeffs_prev = np.zeros(order + 1, dtype=dtype)
    ar_coeffs_prev[0] = dtype(1)
    # 前向和后向的預測誤差
    fwd_pred_error = y[1:]
    bwd_pred_error = y[:-1]
    den = np.dot(fwd_pred_error, fwd_pred_error) + np.dot(bwd_pred_error, bwd_pred_error)
    for i in range(order):
        if den <= 0:
            raise FloatingPointError("numerical error, input ill-conditioned?")
        reflect_coeff = dtype(-2) * np.dot(bwd_pred_error, fwd_pred_error) / dtype(den)
        ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
        for j in range(1, i+2):
            ar_coeffs[j] = ar_coeffs_prev[j] + reflect_coeff * ar_coeffs_prev[i - j + 1]
        # 前向預測誤差和后向預測誤差更新
        fwd_pred_error_tmp = fwd_pred_error
        fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
        bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
        q = dtype(1) - reflect_coeff ** 2
        den = q * den - bwd_pred_error[-1]**2 - fwd_pred_error[0]**2
        fwd_pred_error = fwd_pred_error[1:]
        bwd_pred_error = bwd_pred_error[:-1]
    return ar_coeffs
y, sr = sf.read('q1.wav')
# 得到lpc系數
lpc_coeff = lpc(y, 32)
lpc_order = 32
# lpcc 系數個數
lpcc_order = 48
lpcc_coeff = np.zeros(lpcc_order)
lpcc_coeff[0] = lpc_coeff[0]
for m in range(1, lpc_order):
    lpcc_coeff[m] = lpc_coeff[m]
    for k in range(0,m):
        lpcc_coeff[m] = lpc_coeff[m] + lpcc_coeff[k] * lpc_coeff[m - k] * k / m
for m in range(lpc_order, lpcc_order):
    for k in range(m - lpc_order, m):
        lpcc_coeff[m] = lpcc_coeff[m] + lpcc_coeff[k] * lpc_coeff[m - k] * k / m
print(lpcc_coeff)
print(lpc_coeff)
'''
[ 1.00000000e+00 -3.95327600e+00  1.61128841e+01 -5.52410036e+01
  1.79819243e+02 -5.85851356e+02  1.94600697e+03 -6.60704007e+03
  2.28633585e+04 -8.03469568e+04  2.85871218e+05 -1.02738713e+06
  3.72307949e+06 -1.35861755e+07  4.98734770e+07 -1.84019377e+08
  6.82011923e+08 -2.53758247e+09  9.47444366e+09 -3.54837702e+10
  1.33263280e+11 -5.01739551e+11  1.89335516e+12 -7.15952267e+12
  2.71242538e+13 -1.02940475e+14  3.91300106e+14 -1.48962409e+15
  5.67857747e+15 -2.16748811e+16  8.28305613e+16 -3.16889100e+17
  2.21650898e+18 -1.23415673e+19  6.99397095e+19 -3.97121614e+20
  2.25665287e+21 -1.28331886e+22  7.30333576e+22 -4.15919083e+23
  2.37018398e+24 -1.35153342e+25  7.71134177e+25 -4.40229745e+26
  2.51457017e+27 -1.43705211e+28  8.21666908e+28 -4.70028689e+29]
'''


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM