在 Python 中检测并录制音频为 PCM 格式

Posted 2023-02-25

技术标签:

【中文标题】在 Python 中检测并录制音频为 PCM 格式【英文标题】：Detect & Record Audio to PCM format in Python 【发布时间】：2020-05-27 04:34:29 【问题描述】：

每当它检测到声音（超过某个阈值）时，我需要从树莓派以 PCM 文件格式录制音频，并在它静音时停止。因此，我遇到了一个先前提出的问题，并在Detect & Record Audio in Python 中关注了最佳答案。但是，它以 WAV 格式保存文件，但我使用的 API 仅读取 pcm、speex 或 speex-wb 格式。

如何才能以 pcm 格式录制音频，同时仅在检测到声音时才保留录制功能？

from sys import byteorder
from array import array
from struct import pack

import pyaudio
import wave

THRESHOLD = 8000
chans = 1
chunk = 4096
form_1 = pyaudio.paInt16
samp_rate = 44100
dev_index = 2 # device index found by p.get_device_info_by_index(ii)
wav_output_filename = 'test4.wav'

def is_silent(snd_data):
    "Returns 'True' if below the 'silent' threshold"
    return max(snd_data) < THRESHOLD

def normalize(snd_data):
    "Average the volume out"
    MAXIMUM = 16384
    times = float(MAXIMUM)/max(abs(i) for i in snd_data)

    r = array('h')
    for i in snd_data:
        r.append(int(i*times))
    return r

def trim(snd_data):
    "Trim the blank spots at the start and end"
    def _trim(snd_data):
        snd_started = False
        r = array('h')

        for i in snd_data:
            if not snd_started and abs(i)>THRESHOLD:
                snd_started = True
                r.append(i)

            elif snd_started:
                r.append(i)
        return r

    # Trim to the left
    snd_data = _trim(snd_data)

    # Trim to the right
    snd_data.reverse()
    snd_data = _trim(snd_data)
    snd_data.reverse()
    return snd_data

def add_silence(snd_data, seconds):
    "Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
    silence = [0] * int(seconds * samp_rate)
    r = array('h', silence)
    r.extend(snd_data)
    r.extend(silence)
    return r

def record():
    """
    Record a word or words from the microphone and 
    return the data as an array of signed shorts.

    Normalizes the audio, trims silence from the 
    start and end, and pads with 0.5 seconds of 
    blank sound to make sure VLC et al can play 
    it without getting chopped off.
    """
    audio = pyaudio.PyAudio()
    stream = audio.open(format = form_1, channels=chans, rate = samp_rate,
                    input_device_index = dev_index, input = True, #output=True,
                    frames_per_buffer = chunk)

    num_silent = 0
    snd_started = False

    r = array('h')

    while 1:
        # little endian, signed short
        snd_data = array('h', stream.read(chunk, exception_on_overflow = False))
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)

        silent = is_silent(snd_data)

        if silent and snd_started:
            num_silent += 1
        elif not silent and not snd_started:
            snd_started = True

        if snd_started and num_silent > 30:
            break

    sample_width = audio.get_sample_size(form_1)
    stream.stop_stream()
    stream.close()
    audio.terminate()

    r = normalize(r)
    r = trim(r)
    r = add_silence(r, 0.5)
    return sample_width, r

def record_to_file(path):
    "Records from the microphone and outputs the resulting data to 'path'"
    sample_width, data = record()
    data = pack('<' + ('h'*len(data)), *data)

    wf = wave.open(path, 'wb')
    wf.setnchannels(chans)
    wf.setsampwidth(sample_width) #
    wf.setframerate(samp_rate)
    wf.writeframes(data) #
    wf.close()

if __name__ == '__main__':
    print("please speak a word into the microphone")
    record_to_file(wav_output_filename)
    print("done - result written to " + wav_output_filename)

【问题讨论】：

【参考方案1】：

我无法找到将音频文件格式化为 pcm 的任何解决方案，但我发现我能够以 16k 的采样率处理 wav 格式的音频。所以我会留下它。

【讨论】：

以上是关于在 Python 中检测并录制音频为 PCM 格式的主要内容，如果未能解决你的问题，请参考以下文章