python 音频可视化


  代码整理好放在 github 上了: https://github.com/darkchii/visualize

  bilibili 演示视频:https://www.bilibili.com/video/av77372866

  2020-07-18 11:50:05 更新一个线条颜色渐变的方法:

import matplotlib.pyplot as plt
import numpy as np
import pyaudio
from _tkinter import TclError
from pydub import AudioSegment
from matplotlib.animation import FuncAnimation
from matplotlib.collections import LineCollection


p = pyaudio.PyAudio()
sound = AudioSegment.from_file(file='F:/Music/中原めいこ - Cloudyな午后.mp3')
left = sound.split_to_mono()[0]
fs = left.frame_rate
size = len(left.get_array_of_samples())
channels = left.channels
stream = p.open(
    format=p.get_format_from_width(2,),
    channels=1,
    rate=50000,  # 调整播放速率
    # input=True,
    output=True,
)
stream.start_stream()
fig = plt.figure()
ax = fig.gca(
    # projection='polar'
)
norm2 = plt.Normalize(-1., 1.)
lc = LineCollection([], cmap='gist_ncar', norm=norm2)
ax.set_ylim(-1.5, 1.5)
ax.set_axis_off()
window = int(0.02*fs)
freq = np.linspace(20, 20000, window // 2)
time = np.linspace(0, 20, window)
ax.add_collection(lc)


def update(frames):
    if stream.is_active():
        slice = left.get_sample_slice(frames, frames + window)
        stream.write(slice.raw_data)
        y = np.array(slice.get_array_of_samples()) / 30000
        points = np.array([time, y]).T.reshape(-1, 1, 2)
        segments = np.concatenate([points[:-1], points[1:]], axis=1)
        lc.set_segments(segments)
        lc.set_array(y)

    return lc,


ani = FuncAnimation(fig, update, frames=range(0, size, window), interval=0, blit=True)
plt.show()

  运行截图:

  2020-03-23 13:23:01 还是给一个 pydub(需要自己配置好ffmpeg)正确使用姿势,因为 mp3 格式太常见:

import numpy as np
import pyaudio
from pydub import AudioSegment, effects
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation


p = pyaudio.PyAudio()
sound = AudioSegment.from_file(file='../xxx.mp3')
left = sound.split_to_mono()[0]
fs = left.frame_rate
size = len(left.get_array_of_samples())
channels = left.channels
stream = p.open(
    format=p.get_format_from_width(left.sample_width,),
    channels=channels,
    rate=fs,
    # input=True,
    output=True,
)

stream.start_stream()
fig = plt.figure()
ax1, ax2 = fig.subplots(2, 1)
ax1.set_ylim(0, 0.5)
ax2.set_ylim(-1.5, 1.5)
ax1.set_axis_off()
ax2.set_axis_off()
window = int(0.02*fs) # 20ms
f = np.linspace(20, 20*1000, window // 2)
t = np.linspace(0, 20, window)
lf1, = ax1.plot(f, np.zeros(window // 2), lw=1)
lf2, = ax2.plot(t, np.zeros(window), lw=1)


def update(frames):
    if stream.is_active():
        slice = left.get_sample_slice(frames, frames + window)
        data = slice.raw_data
        stream.write(data)
        y = np.array(slice.get_array_of_samples()) / 30000 # 归一化
        yft = np.abs(np.fft.fft(y)) / (window // 2)

        lf1.set_ydata(yft[:window // 2])
        lf2.set_ydata(y)

    return lf1, lf2,


ani = FuncAnimation(fig, update, frames=range(0, size, window), interval=0, blit=True)
plt.show()

  截图:

  2020-02-25 14:50:27 Animation:

  注:pyaudio open 调节参数 format 有惊喜(取值范围{1, 2, 4, 8, 16,...})

  极坐标版:

import matplotlib.pyplot as plt
from scipy.signal import detrend
# from scipy.fftpack import fft
import numpy as np
import pyaudio
from _tkinter import TclError
import struct
import wave
# import librosa
from pydub import AudioSegment
from matplotlib.animation import FuncAnimation


chunk = 1024
p = pyaudio.PyAudio()
# sound = AudioSegment.from_file(file='../Music/xxx.mp3')
# rdata = sound.get_array_of_samples()
wf = wave.open('../Music/xxx.wav')
stream = p.open(
    format=8,
    channels=wf.getnchannels(),
    rate=wf.getframerate(),
    # input=True,
    output=True,
    # frames_per_buffer=chunk
)

fig = plt.figure()
ax = fig.gca(projection='polar')
# ax.set_ylim(0, 1)
ax.set_axis_off()
lf, = ax.plot(np.linspace(0, 2 * np.pi, chunk), np.zeros(chunk), lw=1)


def init():
    stream.start_stream()
    return lf,


def update(frame):
    if stream.is_active():
        data = wf.readframes(chunk)
        stream.write(data)
        data_int = struct.unpack(str(chunk * 4) + 'B', data)
        y_detrend = detrend(data_int)
        yft = np.abs(np.fft.fft(y_detrend))
        y_vals = yft[:chunk] / (chunk * chunk * 4)
        ind = np.where(y_vals > (np.max(y_vals) + np.min(y_vals)) / 2)
        y_vals[ind[0]] *= 2
        lf.set_ydata(y_vals)
    return lf,


ani = FuncAnimation(fig, update, frames=None,
                    init_func=init, interval=0, blit=True)
plt.show()

  截图:

  stem 版:

import matplotlib.pyplot as plt
from scipy.signal import detrend
# from scipy.fftpack import fft
import numpy as np
import pyaudio
from _tkinter import TclError
import struct
import wave
# import librosa
from pydub import AudioSegment
from matplotlib.animation import FuncAnimation


chunk = 1024
p = pyaudio.PyAudio()
# sound = AudioSegment.from_file(file='../Music/xxx.mp3')
# rdata = sound.get_array_of_samples()
wf = wave.open('../Music/xxx.wav')
stream = p.open(
    format=8,
    channels=wf.getnchannels(),
    rate=wf.getframerate(),
    # input=True,
    output=True,
    # frames_per_buffer=chunk
)

fig = plt.figure()
ax = fig.gca()
ax.set_ylim(0, 1)
ax.set_axis_off()
lf = ax.stem(np.linspace(20, 20000, chunk), np.zeros(chunk), basefmt=':', use_line_collection=True)
lf.markerline.set_color([0.8, 0.2, 0, 0.5])


def init():
    stream.start_stream()
    return lf


def update(frame):
    if stream.is_active():
        data = wf.readframes(chunk)
        stream.write(data)
        data_int = struct.unpack(str(chunk * 4) + 'B', data)
        y_detrend = detrend(data_int)
        yft = np.abs(np.fft.fft(y_detrend))
        y_vals = yft[:chunk] / (chunk * chunk)
        ind = np.where(y_vals > (np.max(y_vals) + np.min(y_vals)) / 2)
        y_vals[ind[0]] *= 4
        lf.markerline.set_ydata(y_vals)
    return lf


ani = FuncAnimation(fig, update, frames=None,
                    init_func=init, interval=0, blit=True)
plt.show()

  semilogx 版:

import matplotlib.pyplot as plt
from scipy.signal import detrend
# from scipy.fftpack import fft
import numpy as np
import pyaudio
from _tkinter import TclError
import struct
import wave
# import librosa
from pydub import AudioSegment
from matplotlib.animation import FuncAnimation


chunk = 1024
p = pyaudio.PyAudio()
# sound = AudioSegment.from_file(file='../Music/xxx.mp3')
# rdata = sound.get_array_of_samples()
wf = wave.open('../Music/xxx.wav')
stream = p.open(
    format=8,
    channels=wf.getnchannels(),
    rate=wf.getframerate(),
    # input=True,
    output=True,
    # frames_per_buffer=chunk
)

fig = plt.figure()
ax = fig.gca()
ax.set_ylim(0, 1)
ax.set_axis_off()
lf, = ax.semilogx(np.linspace(20, 20000, chunk), np.zeros(chunk), lw=1, color='lightblue')


def init():
    stream.start_stream()
    return lf,


def update(frame):
    if stream.is_active():
        data = wf.readframes(chunk)
        stream.write(data)
        data_int = struct.unpack(str(chunk * 4) + 'B', data)
        y_detrend = detrend(data_int)
        yft = np.abs(np.fft.fft(y_detrend))
        y_vals = yft[:chunk] / (chunk * chunk)
        ind = np.where(y_vals > (np.max(y_vals) + np.min(y_vals)) / 2)
        y_vals[ind[0]] *= 4
        lf.set_ydata(y_vals)
    return lf,


ani = FuncAnimation(fig, update, frames=None,
                    init_func=init, interval=0, blit=True)
plt.show()

  2020-02-25 12:18:49 更新一下:

  这次更新是给一个很勉强的通过音频数据来播放音乐并展示 fft 效果的代码,实际上仅仅只是播放音乐很简单,wav 波形文件只需要 wave 即可,想要 mp3 或者其他支持较广类型的音频文件格式,使用 pydub 包更好.。(更新:使用 pydub 不需要解包,只需要通过 get_array_of_samples() 就可以获取数据,raw_data 是字节数组,用于加载到输出流)

  但还是给个代码:

import matplotlib.pyplot as plt
from vispy.plot import Fig
from matplotlib.colors import LightSource
from matplotlib import cm
from scipy.signal import detrend
from scipy.fftpack import fftn
import numpy as np
import pyaudio
from _tkinter import TclError
import struct
import wave
# import librosa
import array
from pydub import AudioSegment
from pydub.utils import get_array_type


chunk = 1024

p = pyaudio.PyAudio()

# sound = AudioSegment.from_file(file='../Music/1563833285950.mp3')
# left = sound.split_to_mono()[1]
# bit_depth = left.sample_width * 8
# array_type = get_array_type(bit_depth)
# numeric_array = array.array(array_type, left.raw_data)

wf = wave.open('../Music/1563833285950.wav')
stream = p.open(
    format=p.get_format_from_width(wf.getsampwidth()),
    channels=wf.getnchannels(),
    rate=wf.getframerate(),
    # input=True,
    output=True,
    # frames_per_buffer=chunk
)
stream.start_stream()
freq = np.linspace(20, 20000, chunk)
yf = np.zeros(chunk)
fig = plt.figure()
ax = fig.gca()
lf, = ax.semilogx(freq, yf, lw=1, color='lightblue')
ax.set_ylim(0, 1)
ax.set_axis_off()
plt.show(block=False)

start = 0
while stream.is_active():
    data = wf.readframes(chunk)
    if len(data) < chunk:
        break
    stream.write(data)

    data_int = struct.unpack(str(chunk * 4) + 'B', data)

    y_detrend = detrend(data_int)
    yft = np.abs(fftn(y_detrend))
    y_vals = yft[:chunk] / (128 * chunk)

    lf.set_ydata(y_vals)

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  给个 pydub 将数据合成到包的源码截图:

  2019-12-31 15:45:34 再来一个:
  极坐标版:

import matplotlib.pyplot as plt
from matplotlib.colors import LightSource
from matplotlib import cm
from scipy.signal import detrend
import numpy as np
import pyaudio
from _tkinter import TclError
import struct


channels = 1
rate = 48000
chunk = 2048
p = pyaudio.PyAudio()
stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    frames_per_buffer=chunk
)
stream.start_stream()
theta = np.linspace(0.0, rate, chunk)
radii = np.zeros(chunk)
fig = plt.figure()
ax = fig.gca(projection='polar')
lf = ax.stem(theta, radii, basefmt=':', use_line_collection=True)
lf.markerline.set_color([0.8, 0.2, 0, 0.5])
ax.set_rorigin(0)
ax.set_axis_off()
plt.show(block=False)
while stream.is_active():
    data = stream.read(chunk)
    data_int = struct.unpack(str(chunk * 2) + 'B', data)
    y_detrend = detrend(data_int)
    yft = np.abs(np.fft.fft(y_detrend))
    y_vals = yft[:chunk] / (128 * chunk)
    lf.markerline.set_ydata(y_vals)

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  笛卡尔直角坐标版:

import matplotlib.pyplot as plt
from matplotlib.colors import LightSource
from matplotlib import cm
from scipy.signal import detrend
import numpy as np
import pyaudio
from _tkinter import TclError
import struct


channels = 1
rate = 48000
chunk = 2048
p = pyaudio.PyAudio()
stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    frames_per_buffer=chunk
)
stream.start_stream()
theta = np.linspace(0.0, rate, chunk)
radii = np.zeros(chunk)
fig = plt.figure()
ax = fig.gca()
lf = ax.stem(theta, radii, basefmt=':', use_line_collection=True)
lf.markerline.set_color([0.8, 0.2, 0, 0.5])
ax.set_ylim(0, 1)
ax.set_axis_off()
plt.show(block=False)
while stream.is_active():
    data = stream.read(chunk)
    data_int = struct.unpack(str(chunk * 2) + 'B', data)
    y_detrend = detrend(data_int)
    yft = np.abs(np.fft.fft(y_detrend))
    y_vals = yft[:chunk] / (128 * chunk)
    lf.markerline.set_ydata(y_vals)

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  运行截图:

  2019-12-30 14:05:57 再来更新一个版本(可惜我的电脑跑起来非常卡):

from scipy.signal import detrend
import numpy as np
import pyaudio
from _tkinter import TclError
import struct


channels = 1
rate = 48000
chunk = 2048

p = pyaudio.PyAudio()
stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    frames_per_buffer=chunk
)

stream.start_stream()
theta = np.linspace(0.0, 2. * np.pi, chunk, endpoint=False)
radii = np.zeros(chunk)
fig = plt.figure()
ax = fig.gca(projection='polar')
lf = ax.bar(x=theta, height=radii, width=0.02, bottom=-2.0, alpha=0.5)
ax.set_axis_off()
plt.show(block=False)

while stream.is_active():
    data = stream.read(chunk)
    data_int = struct.unpack(str(chunk * 2) + 'B', data)
    y_detrend = detrend(data_int)
    yft = np.abs(np.fft.fft(y_detrend))
    y_vals = yft[:chunk] / (128 * chunk)

    for rect, y_val, color in zip(lf.patches, y_vals, plt.cm.Spectral(y_vals * 8)):
        rect.set_height(y_val)
        rect.set_facecolor(color)

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  2019-11-28 20:58:07 更新,围绕在圆环上的2d 版(一开始sb了,直接加在 y 轴上 = =,幸好马上意识到需要做向量旋转):

from _tkinter import TclError
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
import struct
from scipy.signal import savgol_filter, detrend, lfilter


channels = 1
rate = 48000
chunk = 1024 * 2

p = pyaudio.PyAudio()
stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    # output=True,
    frames_per_buffer=chunk
)

stream.start_stream()

R = 2
t = np.linspace(0, 2*np.pi, chunk * 2)
xf = R*np.cos(t)
yf = R*np.sin(t)
fig, ax = plt.subplots(figsize=(7, 7))
lf, = ax.plot(xf, yf, lw=1)
ax.set_xlim(-3, 3)
ax.set_ylim(-3, 3)
ax.set_axis_off()
plt.show(block=False)

fwhm = 20

while stream.is_active():
    data = stream.read(chunk)
    data_int = struct.unpack(str(chunk * 2) + 'B', data)
    y_detrend = detrend(data_int)
    # z_smooth = savgol_filter(data_int, window_length=len(data_int) - 1, mode='valid')
    # box = np.ones(fwhm) / fwhm
    # z_smooth = np.convolve(y_detrend, box, mode='valid')
    yft = np.abs(np.fft.fft(y_detrend))
    y_vals = yft / (256 * chunk)
    ind = np.where(y_vals > np.mean(y_vals))
    y_vals[ind[0]] *= 4
    lf.set_xdata(xf + y_vals * np.cos(t))
    lf.set_ydata(yf + y_vals * np.sin(t))

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  运行截图:

  2019-11-28 16:47:13 更新,3D 版:

from _tkinter import TclError
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
import struct
from scipy.signal import savgol_filter, detrend, lfilter


channels = 1
rate = 48000
chunk = 1024 * 2

p = pyaudio.PyAudio()
stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    # output=True,
    frames_per_buffer=chunk
)

stream.start_stream()
R = 20
t = np.linspace(0, 2*np.pi, chunk * 2)
xf = R*np.cos(t)
yf = R*np.sin(t)
# fig, ax = plt.subplots(figsize=(14, 5))
fig = plt.figure()
ax = fig.gca(projection='3d')
lf, = ax.plot(xf, yf, np.zeros(chunk * 2), lw=2)
# ax.set_xlim(20, rate / 2)
# ax.set_ylim(20, rate / 2)
ax.set_zlim(-0.2, 1.2)
ax.set_axis_off()
plt.show(block=False)

fwhm = 20

while stream.is_active():
    data = stream.read(chunk)
    data_int = struct.unpack(str(chunk * 2) + 'B', data)
    z_detrend = detrend(data_int)
    # z_smooth = savgol_filter(data_int, window_length=len(data_int) - 1, mode='valid')
    # box = np.ones(fwhm) / fwhm
    # z_smooth = np.convolve(z_detrend, box, mode='valid')
    zf = np.abs(np.fft.fft(z_detrend))
    z_vals = zf / (256 * chunk)
    ind = np.where(z_vals > np.mean(z_vals))
    z_vals[ind[0]] *= 4
    lf.set_xdata(xf)
    lf.set_ydata(yf)
    lf.set_3d_properties(z_vals)

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  运行截图:

  23:27:12 更新,为了更加突出重要的频率,我又加了一些代码,就是把振幅大于平均值的那些振幅加倍,主要代码如下:

y_vals = yf[:chunk] / (256 * chunk)
ind = np.where(y_vals > np.mean(y_vals))
y_vals[ind[0]] *= 4
lf.set_ydata(y_vals)

  22:17:15 更新,因为 python 3.8 和 scipy 暂时不兼容,所以用不了scipy 对信号做平滑处理,不过网上找了一段代码,效率不错,这是链接,或者直接把下面代码加到合适的地方。

width = 20
box = np.ones(width) / width
y_smooth = np.convolve(data_int, box, mode='same')
yf = np.fft.fft(y_smooth)
...

  21:58:43 更新,测试了一下对原始信号做平滑处理,发现效率十分低,完全没法看效果,不过目前用的是自己实现的高斯滤波算法。。。

以下为原文

  这里的简单原理就是获取输入输出设备中的数据(注意驱动什么的没有问题,能用麦克风),然后 matplotlib 绘制出来,想要看到音乐的节奏振动就再 fft 一下。至于如何不断更新波形,matplotlib 有一个 animation 方法可以用(见下面第二种方法),但实际上我用了之后发现显示效果不如第一种(可能是姿势不对)。之前用 matlab 做的,也很不错。

  第一种方法(波形显示更流畅,代码参考这个视频):

from _tkinter import TclError
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
import struct


channels = 1
rate = 44100
chunk = 1024 * 2

p = pyaudio.PyAudio()

stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    output=True,
    frames_per_buffer=chunk
)

stream.start_stream()

xf = np.linspace(0, rate, chunk)
fig, ax = plt.subplots()
lf, = ax.semilogx(xf, np.zeros(chunk), '-', lw=1)
ax.set_xlim(20, rate / 2)
ax.set_ylim(0, 1)
plt.show(block=False)

while stream.is_active():
    data = stream.read(chunk)
    data_int = struct.unpack(str(chunk * 2) + 'B', data)
    yf = np.fft.fft(data_int)
    lf.set_ydata(np.abs(yf[:chunk]) / (128 * chunk))

    try:
        ax.figure.canvas.draw()
        ax.figure.canvas.flush_events()
    except TclError:
        stream.stop_stream()
        stream.close()
        break

  第二种方法:

import pyaudio
import numpy as np
# from scipy.fftpack import fft
import matplotlib.pyplot as plt
import struct
from matplotlib.animation import FuncAnimation


channels = 1
rate = 44100
chunk = 1024 * 2
p = pyaudio.PyAudio()
stream = p.open(
    format=pyaudio.paInt16,
    channels=channels,
    rate=rate,
    input=True,
    output=True,
)

stream.start_stream()
x = np.arange(0, 2*chunk, 2)
xf = np.linspace(0, rate, chunk)
fig, (ax1, ax2) = plt.subplots(2)
l, = ax1.plot(x, np.zeros(chunk), '-', lw=1)
lf, = ax2.semilogx(xf, np.zeros(chunk), '-', lw=1)
ax1.set_xlim(0, 2*chunk)
ax1.set_ylim(0, 255)
ax2.set_xlim(20, rate / 2)
ax2.set_ylim(0, 1)
plt.setp(ax1, xticks=[0, chunk, 2 * chunk], yticks=[0, 128, 255])


def gen():
    while stream.is_active():
        data = stream.read(chunk)
        data_int = struct.unpack(str(chunk*2) + 'B', data)
        data_np = np.array(data_int, dtype='b')[::2] + 128
        yf = np.fft.fft(data_int)
        yield data_np, yf


def init():
    lf.set_ydata(np.random.rand(chunk))
    return lf,


def update(data):
    ax1.figure.canvas.draw()
    ax2.figure.canvas.draw()
    l.set_ydata(data[0])
    lf.set_ydata(np.abs(data[1][:chunk]) / (128 * chunk))
    return lf,


animate = FuncAnimation(fig, update, gen, blit=True, interval=0, repeat=False, init_func=init)
plt.show()
stream.stop_stream()
stream.close()

  


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM