AudioEffector 使用方法
作者: Moto Hira
本教程展示了如何使用 torchaudio.io.AudioEffector
对波形张量应用各种效果和编解码器。
本教程需要 FFmpeg 库。有关详细信息,请参阅 FFmpeg 依赖。
概述
AudioEffector
结合了由 StreamWriter
和 StreamReader
提供的内存编码、解码和过滤功能。
下图展示了该过程。
import torch
import torchaudio
print(torch.__version__)
print(torchaudio.__version__)
2.6.0
2.6.0
from torchaudio.io import AudioEffector, CodecConfig
import matplotlib.pyplot as plt
from IPython.display import Audio
for k, v in torchaudio.utils.ffmpeg_utils.get_versions().items():
print(k, v)
libavcodec (60, 3, 100)
libavdevice (60, 1, 100)
libavfilter (9, 3, 100)
libavformat (60, 3, 100)
libavutil (58, 2, 100)
用法
要使用 AudioEffector
,请使用 effect
和 format
实例化它,然后将波形传递给 apply()
或 stream()
方法。
effector = AudioEffector(effect=..., format=...,)
# Apply at once
applied = effector.apply(waveform, sample_rate)
apply
方法将效果和编解码器一次性应用于整个波形。因此,如果输入波形较长,且内存消耗是一个问题,可以使用 stream
方法逐块处理。
# Apply chunk by chunk
for applied_chunk = effector.stream(waveform, sample_rate):
...
示例
src = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
waveform, sr = torchaudio.load(src, channels_first=False)
图库
def show(effect, *, stereo=False):
wf = torch.cat([waveform] * 2, dim=1) if stereo else waveform
figsize = (6.4, 2.1 if stereo else 1.2)
effector = AudioEffector(effect=effect, pad_end=False)
result = effector.apply(wf, int(sr))
num_channels = result.size(1)
f, ax = plt.subplots(num_channels, 1, squeeze=False, figsize=figsize, sharex=True)
for i in range(num_channels):
ax[i][0].specgram(result[:, i], Fs=sr)
f.set_tight_layout(True)
return Audio(result.numpy().T, rate=sr)
原始代码
show(effect=None)
效果
节拍
https://ffmpeg.org/ffmpeg-filters.html#atempo
show("atempo=0.7")
show("atempo=1.8")
高通滤波器
https://ffmpeg.org/ffmpeg-filters.html#highpass
show("highpass=frequency=1500")
低通滤波器
https://ffmpeg.org/ffmpeg-filters.html#lowpass
show("lowpass=frequency=1000")
全通滤波器
https://ffmpeg.org/ffmpeg-filters.html#allpass
show("allpass")
带通滤波器
https://ffmpeg.org/ffmpeg-filters.html#bandpass
show("bandpass=frequency=3000")
带阻滤波器
https://ffmpeg.org/ffmpeg-filters.html#bandreject
show("bandreject=frequency=3000")
回声
https://ffmpeg.org/ffmpeg-filters.html#aecho
show("aecho=in_gain=0.8:out_gain=0.88:delays=6:decays=0.4")
show("aecho=in_gain=0.8:out_gain=0.88:delays=60:decays=0.4")
show("aecho=in_gain=0.8:out_gain=0.9:delays=1000:decays=0.3")
合唱效果
https://ffmpeg.org/ffmpeg-filters.html#chorus
show("chorus=0.5:0.9:50|60|40:0.4|0.32|0.3:0.25|0.4|0.3:2|2.3|1.3")
FFT 滤波器
https://ffmpeg.org/ffmpeg-filters.html#afftfilt
# fmt: off
show(
"afftfilt="
"real='re * (1-clip(b * (b/nb), 0, 1))':"
"imag='im * (1-clip(b * (b/nb), 0, 1))'"
)
show(
"afftfilt="
"real='hypot(re,im) * sin(0)':"
"imag='hypot(re,im) * cos(0)':"
"win_size=512:"
"overlap=0.75"
)
show(
"afftfilt="
"real='hypot(re,im) * cos(2 * 3.14 * (random(0) * 2-1))':"
"imag='hypot(re,im) * sin(2 * 3.14 * (random(1) * 2-1))':"
"win_size=128:"
"overlap=0.8"
)
# fmt: on
颤音效果
https://ffmpeg.org/ffmpeg-filters.html#vibrato
show("vibrato=f=10:d=0.8")
/pytorch/audio/ci_env/lib/python3.10/site-packages/IPython/lib/display.py:188: RuntimeWarning: invalid value encountered in cast
return scaled.astype("<h").tobytes(), nchan
颤音效果
https://ffmpeg.org/ffmpeg-filters.html#tremolo
show("tremolo=f=8:d=0.8")
晶体化器
https://ffmpeg.org/ffmpeg-filters.html#crystalizer
show("crystalizer")
镶边效果
https://ffmpeg.org/ffmpeg-filters.html#flanger
show("flanger")
相位器
https://ffmpeg.org/ffmpeg-filters.html#aphaser
show("aphaser")
脉冲器
https://ffmpeg.org/ffmpeg-filters.html#apulsator
show("apulsator", stereo=True)
haas
https://ffmpeg.org/ffmpeg-filters.html#haas
show("haas")
编解码器
def show_multi(configs):
results = []
for config in configs:
effector = AudioEffector(**config)
results.append(effector.apply(waveform, int(sr)))
num_configs = len(configs)
figsize = (6.4, 0.3 + num_configs * 0.9)
f, axes = plt.subplots(num_configs, 1, figsize=figsize, sharex=True)
for result, ax in zip(results, axes):
ax.specgram(result[:, 0], Fs=sr)
f.set_tight_layout(True)
return [Audio(r.numpy().T, rate=sr) for r in results]
ogg
results = show_multi(
[
{"format": "ogg"},
{"format": "ogg", "encoder": "vorbis"},
{"format": "ogg", "encoder": "opus"},
]
)
ogg - 默认编码器 (flac)
results[0]
ogg - vorbis
results[1]
ogg - opus
results[2]
mp3
https://trac.ffmpeg.org/wiki/Encode/MP3
results = show_multi(
[
{"format": "mp3"},
{"format": "mp3", "codec_config": CodecConfig(compression_level=1)},
{"format": "mp3", "codec_config": CodecConfig(compression_level=9)},
{"format": "mp3", "codec_config": CodecConfig(bit_rate=192_000)},
{"format": "mp3", "codec_config": CodecConfig(bit_rate=8_000)},
{"format": "mp3", "codec_config": CodecConfig(qscale=9)},
{"format": "mp3", "codec_config": CodecConfig(qscale=1)},
]
)
默认
results[0]
compression_level=1
results[1]
compression_level=9
results[2]
比特率=192k
results[3]
比特率=8k
results[4]
qscale=9
results[5]
qscale=1
results[6]
标签: torchaudio.io