有啥方法可以让 SoX 只打印 wav 文件中的幅度值?
Posted
技术标签:
【中文标题】有啥方法可以让 SoX 只打印 wav 文件中的幅度值?【英文标题】:Any way I can get SoX to just print the amplitude values from a wav file?有什么方法可以让 SoX 只打印 wav 文件中的幅度值? 【发布时间】:2017-03-01 20:37:52 【问题描述】:我正在使用一行简单的代码:
sox vocal2.wav -t dat vocal2.dat
通过 SoX 将 wav 文件转换为显示特定时间位置的幅度值的文本文件。输出数据还有一个标题,显示采样率和通道数。
我需要在 C++ 程序中使用振幅数据来确定输入的音高,但是必须转换文件,将数据拖到 Excel 中,复制一列,将其保存到新的 .txt 文件中,这很烦人然后使用它。
有没有办法让 SoX 只在转换后的文件中输出振幅?
【问题讨论】:
【参考方案1】:如果您想要专门用于 C++ 的数据,使用Libsndfile 之类的内容非常容易。这是一个相当成熟的 C 库,但带有一个方便的 C++ 包装器 (sndfile.hh)。
这里的示例用法取自我最近写的一篇文章,我需要轻松访问音频数据。
std::string infile_name = "/path/to/vocal2.wav";
// Open input file.
SndfileHandle infile_handle( infile_name );
if( !infile_handle || infile_handle.error() != 0 )
std::cerr << "Unable to read " << infile_name << std::endl;
std::cerr << infile_handle.strError() << std::endl;
return 1;
// Show file stats
int64_t in_frames = infile_handle.frames();
int in_channels = infile_handle.channels();
int in_samplerate = infile_handle.samplerate();
std::cerr << "Input file: " << infile_name << std::endl;
std::cerr << " * Frames : " << std::setw(6) << in_frames << std::endl;
std::cerr << " * Channels : " << std::setw(6) << in_channels << std::endl;
std::cerr << " * Sample Rate : " << std::setw(6) << in_samplerate << std::endl;
// Read audio data as float
std::vector<float> in_data( in_frames * in_channels );
infile_handle.read( in_data.data(), in_data.size() );
如果你只是想在命令行上使用 SoX 并获得文本输出,你可以这样做:
sox vocal2.wav -t f32 - | od -ve -An | more
这里我指定了一个原始 32 位浮点数的输出,并通过 GNU od 运行它。您无法告诉 od 您想要多少列,这有点令人沮丧,但您可以使用其他简单的工具来清理它。如果您想要不同的示例编码,请查看 od 的手册页。
【讨论】:
这是完美的,谢谢。你有机会分享你的整个 C++ 代码吗?除了未定义的“详细”之外,我还编译了代码-“详细”的功能到底是什么? 我只是将它从我的命令行应用程序中转储出来,认为这很明显是无关紧要的。如果我将-v
作为命令行参数传递,则刚刚设置了verbose
。我已经从我的答案中删除了它。我真的不认为有必要发布我的完整代码,因为它很大,甚至与您的要求无关。
这很公平,我让 libsndfile 做我想做的事(选择一个 wav 文件并输出它的频率),但在编码方面我从来没有遇到过冗长这个词。谢谢!
@SzczepanHołyszewski 你当然有权这样做。请允许我强调,如果您更仔细地阅读该问题,您会发现他们希望在他们的 C++ 程序中获得 WAV 幅度,并且实际上发现进行任何类型的外部文件转换都很烦人。事实是,SoX 没有 提供任何功能来完成所要求的操作,因此我提供了一些更适合目的的简单替代方案。这可能不符合您对软件工程的理想化看法,但恐怕这就是现实。我们经常不得不使用出乎我们意料的解决方案。【参考方案2】:
一个 wav 文件由一个简单的标头和一长串 16 位 pcm 样本组成。
这里有一些代码可以阅读,不太确定它的开发状态。
#define WAVE_FORMAT_PCM 0x01
#define WAVE_FORMAT_IEEE_FLOAT 0x03
#define WAVE_FORMAT_ALAW 0x06 /*8 - bit ITU - T G.711 A - law */
#define WAVE_FORMAT_MULAW 0x07 /* 8 - bit ITU - T G.711 µ - law */
#define WAVE_FORMAT_EXTENSIBLE 0xFFFE
typedef struct
short format_tag;
short channels;
short block_align;
short bits_per_sample;
unsigned long format_length;
unsigned long sample_rate;
unsigned long avg_bytes_sec;
unsigned long data_size;
unsigned char *sound_buffer;
WAV;
WAV *loadwav(const char* filename, int *err);
WAV *floadwav(FILE *fp, int *err);
void killwav(WAV *wav);
static void readformatchunk(FILE *fp, WAV *wav, int *err);
static void readdatachunk(FILE *fp, WAV *wav, int *err);
static void readunknownchunk(FILE *fp, int *err);
static double freadieee754(FILE *fp, int bigendian);
static float freadieee754f(FILE *fp, int bigendian);
static int fget16le(FILE *fp);
static long fget32le(FILE *fp);
WAV *loadwav16stereo(const char *filename, int *err)
WAV *loadwav(const char* filename, int *err)
WAV *answer;
FILE *fp;
if (err)
*err = 0;
fp = fopen(filename, "rb");
if (!fp)
*err = -2;
return 0;
answer = floadwav(fp, err);
fclose(fp);
return answer;
WAV *floadwav(FILE *fp, int *err)
short format_tag, channels, block_align, bits_per_sample;
unsigned long format_length, sample_rate, avg_bytes_sec, i;
unsigned char *sound_buffer;
int data_size;
WAV *answer = 0;
unsigned char id[4];
unsigned long size;
if (err)
*err = 0;
answer = malloc(sizeof(WAV));
if (!answer)
goto out_of_memory;
answer->sound_buffer = 0;
fread(id, sizeof(unsigned char), 4, fp);
if (strncmp(id, "RIFF", 4))
goto parse_error;
size = fget32le(fp);
fread(id, sizeof(unsigned char), 4, fp);
if (strncmp(id, "WAVE", 4))
goto parse_error;
while (1)
if (fread(id, sizeof(unsigned char), 4, fp) != 4)
goto parse_error;
if (!strncmp(id, "fmt ", 4))
readformatchunk(fp, answer, err);
if (*err)
goto parse_error;
else if (!strncmp(id, "data", 4))
readdatachunk(fp, answer, err);
if (*err)
goto parse_error;
break;
else
return answer;
parse_error:
if (err)
*err = -3;
killwav(answer);
return 0;
out_of_memory:
if (err)
*err = -1;
killwav(answer);
return 0;
void killwav(WAV *wav)
if (wav)
free(wav->sound_buffer);
free(wav);
int wav_Nchannels(WAV *wav)
return wav->channels;
int wav_samplerate(WAV *wav)
return wav->sample_rate;
unsigned short *wav_samplesasshort(WAV *wav, long *Nsamples)
unsigned short *answer = 0;
long N;
long i;
if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 8)
N = wav->data_size;
answer = malloc(N * sizeof(short));
if (!answer)
goto out_of_memory;
for (i = 0; i < N; i++)
answer[i] = (wav->sound_buffer[i] - 128) * 256;
else if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 16)
short *sbuffer = (short *) wav->sound_buffer;
N = wav->data_size/2;
answer = malloc(N * sizeof(short));
if (!answer)
goto out_of_memory;
for (i = 0; i < N; i++)
answer[i] = sbuffer[i];
else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 32)
float *fbuffer = (float *)wav->sound_buffer;
N = wav->data_size / 4;
answer = malloc(N * sizeof(short));
if (!answer)
goto out_of_memory;
for (i = 0; i < N; i++)
answer[i] = (short)(fbuffer[i] * SHRT_MAX);
else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 32)
float *dbuffer = (float *)wav->sound_buffer;
N = wav->data_size / 8;
answer = malloc(N * sizeof(short));
if (!answer)
goto out_of_memory;
for (i = 0; i < N; i++)
answer[i] = (short)(dbuffer[i] * SHRT_MAX);
if (Nsamples)
*Nsamples = N;
return answer;
out_of_memory:
return 0;
static void readformatchunk(FILE *fp, WAV *wav, int *err)
short format_tag, channels, block_align, bits_per_sample;
unsigned long format_length, sample_rate, avg_bytes_sec, i;
short cb_size = 0;
short valid_bits_per_sample;
unsigned long channel_mask;
unsigned char guid[16];
format_length = fget32le(fp);
if (format_length < 16)
goto parse_error;
//fread(&format_tag, sizeof(short), 1, fp);
format_tag = fget16le(fp);
//fread(&channels, sizeof(short), 1, fp);
channels = fget16le(fp);
//fread(&sample_rate, sizeof(unsigned long), 1, fp);
sample_rate = fget32le(fp);
//fread(&avg_bytes_sec, sizeof(short), 1, fp);
avg_bytes_sec = fget32le(fp);
//fread(&block_align, sizeof(short), 1, fp);
block_align = fget16le(fp);
//fread(&bits_per_sample, sizeof(short), 1, fp);
bits_per_sample = fget16le(fp);
if (format_length > 16)
cb_size = fget16le(fp);
if (cb_size >= 22)
valid_bits_per_sample = fget16le(fp);
channel_mask = fget32le(fp);
fread(&guid, 1, 16, fp);
cb_size -= 22;
for (i = 0; i < cb_size; i++)
fgetc(fp);
else
for (i = 0; i < cb_size; i++)
fgetc(fp);
wav->format_tag = format_tag;
wav->channels = channels;
wav->bits_per_sample = bits_per_sample;
wav->sample_rate = sample_rate;
return;
parse_error:
*err = -1;
static void readdatachunk(FILE *fp, WAV *wav, int *err)
unsigned long data_size;
unsigned long i;
unsigned char *sound_buffer;
unsigned char *buff8;
short *buff16;
float *bufffloat;
double *buffdouble;
data_size = fget32le(fp);
wav->data_size = data_size;
if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 8)
buff8 = malloc(data_size);
for (i = 0; i < data_size; i++)
buff8[i] = fgetc(fp);
wav->sound_buffer = buff8;
else if (wav->format_tag == WAVE_FORMAT_PCM && wav->bits_per_sample == 16)
buff16 = malloc(data_size/2 * sizeof(short));
for (i = 0; i < data_size/2; i++)
buff16[i] = fget16le(fp);
wav->sound_buffer = buff16;
else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 32)
bufffloat = malloc(data_size / 4 * sizeof(float));
for (i = 0; i < data_size / 4; i++)
bufffloat[i] = freadieee754f(fp, 0);
wav->sound_buffer = bufffloat;
else if (wav->format_tag == WAVE_FORMAT_IEEE_FLOAT && wav->bits_per_sample == 64)
buffdouble = malloc(data_size / 8 * sizeof(float));
for (i = 0; i < data_size / 8; i++)
buffdouble[i] = freadieee754(fp, 0);
wav->sound_buffer = buffdouble;
return;
parse_error:
*err = -3;
return;
out_of_memory:
*err = -1;
return;
static void readunknownchunk(FILE *fp, int *err)
unsigned long data_size;
data_size = fget32le(fp);
while (data_size--)
if (fgetc(fp) == EOF)
goto parse_error;
return;
parse_error:
*err = -3;
return;
static int wav_is_16bitstereo(WAV *wav)
/*
* read a double from a stream in ieee754 format regardless of host
* encoding.
* fp - the stream
* bigendian - set to if big bytes first, clear for little bytes
* first
*
*/
static double freadieee754(FILE *fp, int bigendian)
unsigned char buff[8];
int i;
double fnorm = 0.0;
unsigned char temp;
int sign;
int exponent;
double bitval;
int maski, mask;
int expbits = 11;
int significandbits = 52;
int shift;
double answer;
/* read the data */
for (i = 0; i < 8; i++)
buff[i] = fgetc(fp);
/* just reverse if not big-endian*/
if (!bigendian)
for (i = 0; i < 4; i++)
temp = buff[i];
buff[i] = buff[8 - i - 1];
buff[8 - i - 1] = temp;
sign = buff[0] & 0x80 ? -1 : 1;
/* exponet in raw format*/
exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
/* read inthe mantissa. Top bit is 0.5, the successive bits half*/
bitval = 0.5;
maski = 1;
mask = 0x08;
for (i = 0; i < significandbits; i++)
if (buff[maski] & mask)
fnorm += bitval;
bitval /= 2.0;
mask >>= 1;
if (mask == 0)
mask = 0x80;
maski++;
/* handle zero specially */
if (exponent == 0 && fnorm == 0)
return 0.0;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
/* nans have exp 1024 and non-zero mantissa */
if (shift == 1024 && fnorm != 0)
return sqrt(-1.0);
/*infinity*/
if (shift == 1024 && fnorm == 0)
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0) / 0.0;
if (shift > -1023)
answer = ldexp(fnorm + 1.0, shift);
return answer * sign;
else
/* denormalised numbers */
if (fnorm == 0.0)
return 0.0;
shift = -1022;
while (fnorm < 1.0)
fnorm *= 2;
shift--;
answer = ldexp(fnorm, shift);
return answer * sign;
static float freadieee754f(FILE *fp, int bigendian)
unsigned long buff = 0;
unsigned long buff2 = 0;
unsigned long mask;
int sign;
int exponent;
int shift;
int i;
int significandbits = 23;
int expbits = 8;
double fnorm = 0.0;
double bitval;
double answer;
for (i = 0; i<4; i++)
buff = (buff << 8) | fgetc(fp);
if (!bigendian)
for (i = 0; i<4; i++)
buff2 <<= 8;
buff2 |= (buff & 0xFF);
buff >>= 8;
buff = buff2;
sign = (buff & 0x80000000) ? -1 : 1;
mask = 0x00400000;
exponent = (buff & 0x7F800000) >> 23;
bitval = 0.5;
for (i = 0; i<significandbits; i++)
if (buff & mask)
fnorm += bitval;
bitval /= 2;
mask >>= 1;
if (exponent == 0 && fnorm == 0.0)
return 0.0f;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
if (shift == 128 && fnorm != 0.0)
return (float)sqrt(-1.0);
if (shift == 128 && fnorm == 0.0)
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0f) / 0.0f;
if (shift > -127)
answer = ldexp(fnorm + 1.0, shift);
return (float)answer * sign;
else
if (fnorm == 0.0)
return 0.0f;
shift = -126;
while (fnorm < 1.0)
fnorm *= 2;
shift--;
answer = ldexp(fnorm, shift);
return (float)answer * sign;
static int fget16le(FILE *fp)
int c1, c2;
c1 = fgetc(fp);
c2 = fgetc(fp);
return ((c2 ^ 128) - 128) * 256 + c1;
static long fget32le(FILE *fp)
int c1, c2, c3, c4;
c1 = fgetc(fp);
c2 = fgetc(fp);
c3 = fgetc(fp);
c4 = fgetc(fp);
return ((c4 ^ 128) - 128) * 256 * 256 * 256 + c3 * 256 * 256 + c2 * 256 + c1;
void wavfilemain(void)
int err;
WAV *wav = loadwav("C:\\Users\\Malcolm\\Documents\\Visual Studio 2013\\Projects\\ANSIScratch\\ANSIScratch\\LaserBlast.wav", &err);
short *samples;
long N;
long i;
printf("here %p\n", wav);
printf("%d fmt %d bits %d channels %d\n", wav->sample_rate, wav->format_tag, wav->bits_per_sample, wav->channels);
samples = wav_samplesasshort(wav, &N);
killwav(wav);
【讨论】:
我更喜欢通过 SoX 来实现,因为它相对轻量级并且只使用一行代码进行转换(而且我知道它可以工作)以上是关于有啥方法可以让 SoX 只打印 wav 文件中的幅度值?的主要内容,如果未能解决你的问题,请参考以下文章
当我尝试更改采样率时,为啥 sox 会损坏我的 wav 文件?