使用 NAO 执行远程音频处理时出现问题
Posted
技术标签:
【中文标题】使用 NAO 执行远程音频处理时出现问题【英文标题】:Troubles performing remote audio processing with NAO 【发布时间】:2019-06-28 18:01:34 【问题描述】:我正在尝试使用 Python naoqi SDK 的 2.1.4.13 版本通过 NAO v4 执行一些远程音频处理,但我在这样做时遇到了困难。我尝试使用 Alexandre Mazel 在NAO robot remote audio problems 提供的解决方案,但我仍然无法从缓冲区中检索音频数据。帖子中引用的代码在下面可用。我还按照https://www.generationrobots.com/media/NAO%20Next%20Gen/FeaturePaper(AudiosignalProcessing)%20(1).pdf中的远程处理音频数据的说明进行操作
我非常感谢任何帮助或解决方案,因为我已经被这个问题困扰了好几天了。
# -*- coding: utf-8 -*-
###########################################################
# Retrieve robot audio buffer
# Syntaxe:
# python scriptname --pip <ip> --pport <port>
#
# --pip <ip>: specify the ip of your robot (without specification it will use the NAO_IP defined some line below
#
# Author: Alexandre Mazel
###########################################################
NAO_IP = "10.0.252.126" # Romeo on table
#~ NAO_IP = "10.0.253.99" # Nao Alex Blue
from optparse import OptionParser
import naoqi
import numpy as np
import time
import sys
class SoundReceiverModule(naoqi.ALModule):
"""
Use this object to get call back from the ALMemory of the naoqi world.
Your callback needs to be a method with two parameter (variable name, value).
"""
def __init__( self, strModuleName, strNaoIp ):
try:
naoqi.ALModule.__init__(self, strModuleName );
self.BIND_PYTHON( self.getName(),"callback" );
self.strNaoIp = strNaoIp;
self.outfile = None;
self.aOutfile = [None]*(4-1); # ASSUME max nbr channels = 4
except BaseException, err:
print( "ERR: abcdk.naoqitools.SoundReceiverModule: loading error: %s" % str(err) );
# __init__ - end
def __del__( self ):
print( "INF: abcdk.SoundReceiverModule.__del__: cleaning everything" );
self.stop();
def start( self ):
audio = naoqi.ALProxy( "ALAudioDevice", self.strNaoIp, 9559 );
nNbrChannelFlag = 0; # ALL_Channels: 0, AL::LEFTCHANNEL: 1, AL::RIGHTCHANNEL: 2; AL::FRONTCHANNEL: 3 or AL::REARCHANNEL: 4.
nDeinterleave = 0;
nSampleRate = 48000;
audio.setClientPreferences( self.getName(), nSampleRate, nNbrChannelFlag, nDeinterleave ); # setting same as default generate a bug !?!
audio.subscribe( self.getName() );
print( "INF: SoundReceiver: started!" );
# self.processRemote( 4, 128, [18,0], "A"*128*4*2 ); # for local test
# on romeo, here's the current order:
# 0: right; 1: rear; 2: left; 3: front,
def stop( self ):
print( "INF: SoundReceiver: stopping..." );
audio = naoqi.ALProxy( "ALAudioDevice", self.strNaoIp, 9559 );
audio.unsubscribe( self.getName() );
print( "INF: SoundReceiver: stopped!" );
if( self.outfile != None ):
self.outfile.close();
def processRemote( self, nbOfChannels, nbrOfSamplesByChannel, aTimeStamp, buffer ):
"""
This is THE method that receives all the sound buffers from the "ALAudioDevice" module
"""
#~ print( "process!" );
#~ print( "processRemote: %s, %s, %s, lendata: %s, data0: %s (0x%x), data1: %s (0x%x)" % (nbOfChannels, nbrOfSamplesByChannel, aTimeStamp, len(buffer), buffer[0],ord(buffer[0]),buffer[1],ord(buffer[1])) );
#~ print( "raw data: " ),
#~ for i in range( 8 ):
#~ print( "%s (0x%x), " % (buffer[i],ord(buffer[i])) ),
#~ print( "" );
aSoundDataInterlaced = np.fromstring( str(buffer), dtype=np.int16 );
#~ print( "len data: %s " % len( aSoundDataInterlaced ) );
#~ print( "data interlaced: " ),
#~ for i in range( 8 ):
#~ print( "%d, " % (aSoundDataInterlaced[i]) ),
#~ print( "" );
aSoundData = np.reshape( aSoundDataInterlaced, (nbOfChannels, nbrOfSamplesByChannel), 'F' );
#~ print( "len data: %s " % len( aSoundData ) );
#~ print( "len data 0: %s " % len( aSoundData[0] ) );
if( False ):
# compute average
aAvgValue = np.mean( aSoundData, axis = 1 );
print( "avg: %s" % aAvgValue );
if( False ):
# compute fft
nBlockSize = nbrOfSamplesByChannel;
signal = aSoundData[0] * np.hanning( nBlockSize );
aFft = ( np.fft.rfft(signal) / nBlockSize );
print aFft;
if( False ):
# compute peak
aPeakValue = np.max( aSoundData );
if( aPeakValue > 16000 ):
print( "Peak: %s" % aPeakValue );
if( True ):
bSaveAll = True;
# save to file
if( self.outfile == None ):
strFilenameOut = "/out.raw";
print( "INF: Writing sound to '%s'" % strFilenameOut );
self.outfile = open( strFilenameOut, "wb" );
if( bSaveAll ):
for nNumChannel in range( 1, nbOfChannels ):
strFilenameOutChan = strFilenameOut.replace(".raw", "_%d.raw"%nNumChannel);
self.aOutfile[nNumChannel-1] = open( strFilenameOutChan, "wb" );
print( "INF: Writing other channel sound to '%s'" % strFilenameOutChan );
#~ aSoundDataInterlaced.tofile( self.outfile ); # wrote the 4 channels
aSoundData[0].tofile( self.outfile ); # wrote only one channel
#~ print( "aTimeStamp: %s" % aTimeStamp );
#~ print( "data wrotten: " ),
#~ for i in range( 8 ):
#~ print( "%d, " % (aSoundData[0][i]) ),
#~ print( "" );
#~ self.stop(); # make naoqi crashes
if( bSaveAll ):
for nNumChannel in range( 1, nbOfChannels ):
aSoundData[nNumChannel].tofile( self.aOutfile[nNumChannel-1] );
# processRemote - end
def version( self ):
return "0.6";
# SoundReceiver - end
def main():
""" Main entry point
"""
parser = OptionParser()
parser.add_option("--pip",
help="Parent broker port. The IP address or your robot",
dest="pip")
parser.add_option("--pport",
help="Parent broker port. The port NAOqi is listening to",
dest="pport",
type="int")
parser.set_defaults(
pip=NAO_IP,
pport=9559)
(opts, args_) = parser.parse_args()
pip = opts.pip
pport = opts.pport
# We need this broker to be able to construct
# NAOqi modules and subscribe to other modules
# The broker must stay alive until the program exists
myBroker = naoqi.ALBroker("myBroker",
"0.0.0.0", # listen to anyone
0, # find a free port and use it
pip, # parent broker IP
pport) # parent broker port
# Warning: SoundReceiver must be a global variable
# The name given to the constructor must be the name of the
# variable
global SoundReceiver
SoundReceiver = SoundReceiverModule("SoundReceiver", pip)
SoundReceiver.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
print
print "Interrupted by user, shutting down"
myBroker.shutdown()
sys.exit(0)
if __name__ == "__main__":
main()
【问题讨论】:
“但我仍然无法从缓冲区中检索音频数据。”是否抛出错误?缓冲区是空的吗?你能访问缓冲区吗?你有什么具体问题? 嗨安德鲁,没有错误被抛出。根据 NAOqi 文档,模块 ALAudioDevice 应该通过 processRemote 函数将音频数据发送到缓冲区。但是,它似乎没有将这些数据传递给函数。我可以说,因为当我尝试将原始数据转换为 wav 文件时,生成的 wav 文件是空的。 【参考方案1】:也许这个项目可以帮助你定位:
https://github.com/UNSWComputing/rUNSWift-2015-release/wiki/Whistle-Detection
它提供了一个whistle_detector.py
Python 模块,
也可以在 2.1 工具链/Nao V4 下的 Nao 上运行。
这个项目也值得一看:
https://www.ibm.com/blogs/watson/2016/07/getting-robots-listen-using-watsons-speech-text-service/
它使用一种方法是调用linux命令arecord
而不是使用ALAudioDevice
【讨论】:
非常感谢。这应该足以让我指出正确的方向。我想说明我使用的是 Windows 操作系统,那么有什么类似于 Windows 的 arecord 的吗? 欢迎您!第二个项目的脚本在运行 linux 的机器人上执行。因此,命令“arecord”也在机器人 linux 上执行。您的开发设备的操作系统无关紧要。【参考方案2】:您是否尝试过documentation 中的示例?
尝试在机器人上运行:
#! /usr/bin/env python
# -*- encoding: UTF-8 -*-
"""Example: Get Signal from Front Microphone & Calculate its rms Power"""
import qi
import argparse
import sys
import time
import numpy as np
class SoundProcessingModule(object):
"""
A simple get signal from the front microphone of Nao & calculate its rms power.
It requires numpy.
"""
def __init__( self, app):
"""
Initialise services and variables.
"""
super(SoundProcessingModule, self).__init__()
app.start()
session = app.session
# Get the service ALAudioDevice.
self.audio_service = session.service("ALAudioDevice")
self.isProcessingDone = False
self.nbOfFramesToProcess = 20
self.framesCount=0
self.micFront = []
self.module_name = "SoundProcessingModule"
def startProcessing(self):
"""
Start processing
"""
# ask for the front microphone signal sampled at 16kHz
# if you want the 4 channels call setClientPreferences(self.module_name, 48000, 0, 0)
self.audio_service.setClientPreferences(self.module_name, 16000, 3, 0)
self.audio_service.subscribe(self.module_name)
while self.isProcessingDone == False:
time.sleep(1)
self.audio_service.unsubscribe(self.module_name)
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
"""
Compute RMS from mic.
"""
self.framesCount = self.framesCount + 1
if (self.framesCount <= self.nbOfFramesToProcess):
# convert inputBuffer to signed integer as it is interpreted as a string by python
self.micFront=self.convertStr2SignedInt(inputBuffer)
#compute the rms level on front mic
rmsMicFront = self.calcRMSLevel(self.micFront)
print "rms level mic front = " + str(rmsMicFront)
else :
self.isProcessingDone=True
def calcRMSLevel(self,data) :
"""
Calculate RMS level
"""
rms = 20 * np.log10( np.sqrt( np.sum( np.power(data,2) / len(data) )))
return rms
def convertStr2SignedInt(self, data) :
"""
This function takes a string containing 16 bits little endian sound
samples as input and returns a vector containing the 16 bits sound
samples values converted between -1 and 1.
"""
signedData=[]
ind=0;
for i in range (0,len(data)/2) :
signedData.append(data[ind]+data[ind+1]*256)
ind=ind+2
for i in range (0,len(signedData)) :
if signedData[i]>=32768 :
signedData[i]=signedData[i]-65536
for i in range (0,len(signedData)) :
signedData[i]=signedData[i]/32768.0
return signedData
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--ip", type=str, default="127.0.0.1",
help="Robot IP address. On robot or Local Naoqi: use '127.0.0.1'.")
parser.add_argument("--port", type=int, default=9559,
help="Naoqi port number")
args = parser.parse_args()
try:
# Initialize qi framework.
connection_url = "tcp://" + args.ip + ":" + str(args.port)
app = qi.Application(["SoundProcessingModule", "--qi-url=" + connection_url])
except RuntimeError:
print ("Can't connect to Naoqi at ip \"" + args.ip + "\" on port " + str(args.port) +".\n"
"Please check your script arguments. Run with -h option for help.")
sys.exit(1)
MySoundProcessingModule = SoundProcessingModule(app)
app.session.registerService("SoundProcessingModule", MySoundProcessingModule)
MySoundProcessingModule.startProcessing()
【讨论】:
是的,我试过使用这个例子。但是,它不起作用,因为我使用的是与我正在使用的 NAO 机器人兼容的旧版本的 python naoqi SDK以上是关于使用 NAO 执行远程音频处理时出现问题的主要内容,如果未能解决你的问题,请参考以下文章
使用 Web Audio API 和 Wrtc 进行远程音频处理