讀取信號為數組

def wavread(filename): f = wave.open(filename,'rb') params = f.getparams() nchannels, sampwidth, framerate, nframes = params[:4] strData = f.readframes(nframes)#讀取音頻,字符串格式 waveData = np.fromstring(strData,dtype=np.int16)#將字符串轉化為int f.close() waveData = waveData*1.0/(max(abs(waveData)))#wave幅值歸一化 waveData = np.reshape(waveData,[nframes,nchannels]) return waveData
音量計算

# method 1: absSum def calVolume(waveData, frameSize, overLap): wlen = len(waveData) step = frameSize - overLap frameNum = int(math.ceil(wlen*1.0/step)) volume = np.zeros((frameNum,1)) for i in range(frameNum): curFrame = waveData[np.arange(i*step,min(i*step+frameSize,wlen))] curFrame = curFrame - np.median(curFrame) # zero-justified volume[i] = np.sum(np.abs(curFrame)) return volume
分幀信號

def enframe(signal, nw, inc): '''將音頻信號轉化為幀。 參數含義: signal:原始音頻型號 nw:每一幀的長度(這里指采樣點的長度,即采樣頻率乘以時間間隔) inc:相鄰幀的間隔(同上定義) ''' signal_length=len(signal) #信號總長度 if signal_length<=nw: #若信號長度小於一個幀的長度,則幀數定義為1 nf=1 else: #否則,計算幀的總長度 nf=int(np.ceil((1.0*signal_length-nw+inc)/inc)) pad_length=int((nf-1)*inc+nw) #所有幀加起來總的鋪平后的長度 zeros=np.zeros((pad_length-signal_length,)) #不夠的長度使用0填補,類似於FFT中的擴充數組操作 pad_signal=np.concatenate((signal,zeros)) #填補后的信號記為pad_signal indices=np.tile(np.arange(0,nw),(nf,1))+np.tile(np.arange(0,nf*inc,inc),(nw,1)).T #相當於對所有幀的時間點進行抽取,得到nf*nw長度的矩陣 indices=np.array(indices,dtype=np.int32) #將indices轉化為矩陣 frames=pad_signal[indices] #得到幀信號 # win=np.tile(winfunc(nw),(nf,1)) #window窗函數,這里默認取1 # return frames*win #返回幀信號矩陣 return frames
#端點檢測,通道(閾值)計算方法

def findIndex(vol,thres): l = len(vol) ii = 0 index = np.zeros(500,dtype=np.int16) for i in range(l-1): if((vol[i]-thres)*(vol[i+1]-thres)<0): index[ii]=i ii = ii+1 #return index[[0,-1]] return index
#基頻(音高)計算

# 自相關函數計算基頻率 def ACF(frame): flen = len(frame) acf = np.zeros(flen) for i in range(flen): acf[i] = np.sum(frame[i:flen]*frame[0:flen-i]) return acf