c_cpp 音频混音器:缩混5.1到立体声或重映射通道

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了c_cpp 音频混音器:缩混5.1到立体声或重映射通道相关的知识,希望对你有一定的参考价值。

#include "common.h"
#include "mixer.h"
#include <cassert>  // for assert
#include <cstdint>  // for uint32_t
#include <vector>   // for std::vector

using std::vector;

const float M = 1.0f;     // Mono
const float L = 2.0f;     // Left
const float R = 3.0f;     // Right
const float C = 4.0f;     // Center
const float LS = 5.0f;    // Left Surround
const float RS = 6.0f;    // Right Surround
const float RLS = 7.0f;   // Rear Left Surround
const float RC = 8.0f;    // Rear Center
const float RRS = 9.0f;   // Rear Right Surround
const float LFE = 10.0f;  // Low Frequency Effects

const float INV_SQRT_2 = 0.707106f; // 1/sqrt(2)
const float DOWNMIX_3F2_RESULTS[2][12][5] = {
  // 3F2
  {
    { INV_SQRT_2*(L+R) + C + 0.5f*(LS+RS) },                          // Mono
    { INV_SQRT_2*(L+R) + C + 0.5f*(LS+RS), 0 },                       // Mono-LFE
    { L + INV_SQRT_2*(C+LS), R + INV_SQRT_2*(C+RS) },                 // Stereo
    { L + INV_SQRT_2*(C+LS), R + INV_SQRT_2*(C+RS), 0 },              // Stereo-LFE
    { L + INV_SQRT_2*LS, R + INV_SQRT_2*RS, C },                      // 3F
    { L + INV_SQRT_2*LS, R + INV_SQRT_2*RS, C, 0 },                   // 3F-LFE
    { L + C*INV_SQRT_2, R + C*INV_SQRT_2, INV_SQRT_2*(LS+RS) },       // 2F1
    { L + C*INV_SQRT_2, R + C*INV_SQRT_2, 0, INV_SQRT_2*(LS+RS) },    // 2F1-LFE
    { L, R, C, INV_SQRT_2*(LS+RS) },                                  // 3F1
    { L, R, C, 0, INV_SQRT_2*(LS+RS) },                               // 3F1-LFE
    { L + INV_SQRT_2*C, R + INV_SQRT_2*C, LS, RS },                   // 2F2
    { L + INV_SQRT_2*C, R + INV_SQRT_2*C, 0, LS, RS }                 // 2F2-LFE
  },
  // 3F2-LFE
  {
    { INV_SQRT_2*(L+R) + C + 0.5f*(LS+RS) },                          // Mono
    { INV_SQRT_2*(L+R) + C + 0.5f*(LS+RS), LFE },                     // Mono-LFE
    { L + INV_SQRT_2*(C+LS), R + INV_SQRT_2*(C+RS) },                 // Stereo
    { L + INV_SQRT_2*(C+LS), R + INV_SQRT_2*(C+RS), LFE },            // Stereo-LFE
    { L + INV_SQRT_2*LS, R + INV_SQRT_2*RS, C },                      // 3F
    { L + INV_SQRT_2*LS, R + INV_SQRT_2*RS, C, LFE },                 // 3F-LFE
    { L + C*INV_SQRT_2, R + C*INV_SQRT_2, INV_SQRT_2*(LS+RS) },       // 2F1
    { L + C*INV_SQRT_2, R + C*INV_SQRT_2, LFE, INV_SQRT_2*(LS+RS) },  // 2F1-LFE
    { L, R, C, INV_SQRT_2*(LS+RS) },                                  // 3F1
    { L, R, C, LFE, INV_SQRT_2*(LS+RS) },                             // 3F1-LFE
    { L + INV_SQRT_2*C, R + INV_SQRT_2*C, LS, RS },                   // 2F2
    { L + INV_SQRT_2*C, R + INV_SQRT_2*C, LFE, LS, RS }               // 2F2-LFE
  }
};

typedef struct {
  const LAYOUT layout;
  float data[CHANNEL_MAX];
} audio_input;

audio_input audio_inputs[SMPTE_MAX] = {
  { SMPTE_DUAL_MONO,      { L, R } },
  { SMPTE_DUAL_MONO_LFE,  { L, R, LFE } },
  { SMPTE_MONO,           { M } },
  { SMPTE_MONO_LFE,       { M, LFE } },
  { SMPTE_STEREO,         { L, R } },
  { SMPTE_STEREO_LFE,     { L, R, LFE } },
  { SMPTE_3F,             { L, R, C } },
  { SMPTE_3F_LFE,         { L, R, C, LFE } },
  { SMPTE_2F1,            { L, R, RC } },
  { SMPTE_2F1_LFE,        { L, R, LFE, RC } },
  { SMPTE_3F1,            { L, R, C, RC } },
  { SMPTE_3F1_LFE,        { L, R, C, LFE, RC } },
  { SMPTE_2F2,            { L, R, LS, RS } },
  { SMPTE_2F2_LFE,        { L, R, LFE, LS, RS } },
  { SMPTE_3F2,            { L, R, C, LS, RS } },
  { SMPTE_3F2_LFE,        { L, R, C, LFE, LS, RS } },
  { SMPTE_3F3R_LFE,       { L, R, C, LFE, RC, LS, RS } },
  { SMPTE_3F4_LFE,        { L, R, C, LFE, RLS, RRS, LS, RS } },
};

void downmix_test(const float* data, LAYOUT in_layout, LAYOUT out_layout) {
  LOG("\nDownmix from %s to %s\n", CHANNEL_LAYOUT_MAPS[in_layout].name, CHANNEL_LAYOUT_MAPS[out_layout].name);

  const unsigned long inframes = 10;
  const unsigned int in_channels = CHANNEL_LAYOUT_MAPS[in_layout].channels;
  const unsigned int out_channels = CHANNEL_LAYOUT_MAPS[out_layout].channels;

  vector<float> in(in_channels * inframes);
  vector<float> out(out_channels * inframes);

  // Generate input data
  for (unsigned int offset = 0 ; offset < inframes * in_channels ; offset += in_channels) {
    for (unsigned int i = 0 ; i < in_channels ; ++i) {
      in[offset + i] = data[i];
    }
  }

  if (!downmix_float(in.data(), inframes, out.data(), in_channels, out_channels, in_layout, out_layout)) {
    LOG("Invalid downmix\n");
    return;
  }

  uint32_t in_layout_mask = 0;
  for (unsigned int i = 0 ; i < in_channels ; ++i) {
    in_layout_mask |= 1 << CHANNEL_LAYOUT_MAPS[in_layout].channel_order[i];
  }

  uint32_t out_layout_mask = 0;
  for (unsigned int i = 0 ; i < out_channels ; ++i) {
    out_layout_mask |= 1 << CHANNEL_LAYOUT_MAPS[out_layout].channel_order[i];
  }

  for (unsigned int i = 0 ; i < inframes * out_channels ; ++i) {
    unsigned int index = i % out_channels;

    // downmix_3f2
    if ((in_layout == SMPTE_3F2 || in_layout == SMPTE_3F2_LFE) &&
        out_layout >= SMPTE_MONO && out_layout <= SMPTE_2F2_LFE) {
      const float* downmix_results = DOWNMIX_3F2_RESULTS[in_layout - SMPTE_3F2][out_layout - SMPTE_MONO];
      LOG("[3f2] Expect: %lf, Get: %lf\n", downmix_results[index], out[index]);
      assert(out[index] == downmix_results[index]);
      continue;
    }

    // mix_remap
    if (out_layout_mask & in_layout_mask) {
      uint32_t mask = 1 << CHANNEL_LAYOUT_MAPS[out_layout].channel_order[index];
      LOG("[remap channels] Expect: %lf, Get: %lf\n", (mask & in_layout_mask) ? audio_inputs[out_layout].data[index] : 0, out[index]);
      assert(out[index] == ((mask & in_layout_mask) ? audio_inputs[out_layout].data[index] : 0));
      continue;
    }

    // downmix_fallback
    LOG("[fallback] Expect: %lf, Get: %lf\n", audio_inputs[in_layout].data[index], out[index]);
    assert(out[index] == audio_inputs[in_layout].data[index]);
  }
}

int main() {
  for (int i = 0 ; i < ARRAY_LENGTH(audio_inputs) ; ++i) { // input
    for (int j = 0 ; j < ARRAY_LENGTH(CHANNEL_LAYOUT_MAPS) ; ++j) { // output
      downmix_test(audio_inputs[i].data, audio_inputs[i].layout, CHANNEL_LAYOUT_MAPS[j].layout);
    }
  }
  return 0;
}
#ifndef MIXER
#define MIXER

/*
 * Convert SMPTE 3F2(-LFE) input audio to various output data with different layout.
 *
 * SMPTE channel layouts:
 * --------------------------------------------------
 * DUAL-MONO      L   R
 * DUAL-MONO-LFE  L   R   LFE
 * MONO           M
 * MONO-LFE       M   LFE
 * STEREO         L   R
 * STEREO-LFE     L   R   LFE
 * 3F             L   R   C
 * 3F-LFE         L   R   C    LFE
 * 2F1            L   R   S
 * 2F1-LFE        L   R   LFE  S
 * 3F1            L   R   C    S
 * 3F1-LFE        L   R   C    LFE S
 * 2F2            L   R   LS   RS
 * 2F2-LFE        L   R   LFE  LS   RS
 * 3F2            L   R   C    LS   RS
 * 3F2-LFE        L   R   C    LFE  LS   RS
 * 3F3R-LFE       L   R   C    LFE  BC   LS   RS
 * 3F4-LFE        L   R   C    LFE  RLS  RRS  LS   RS
 */

#ifdef __cplusplus
extern "C" {
#endif

enum CHANNEL {
  CHANNEL_MONO = 0,
  CHANNEL_LEFT,
  CHANNEL_RIGHT,
  CHANNEL_CENTER,
  CHANNEL_LS,
  CHANNEL_RS,
  CHANNEL_RLS,
  CHANNEL_RCENTER,
  CHANNEL_RRS,
  CHANNEL_LFE,
  CHANNEL_MAX // Max number of supported channels.
};

enum LAYOUT {
  SMPTE_DUAL_MONO = 0,
  SMPTE_DUAL_MONO_LFE,
  SMPTE_MONO,
  SMPTE_MONO_LFE,
  SMPTE_STEREO,
  SMPTE_STEREO_LFE,
  SMPTE_3F,
  SMPTE_3F_LFE,
  SMPTE_2F1,
  SMPTE_2F1_LFE,
  SMPTE_3F1,
  SMPTE_3F1_LFE,
  SMPTE_2F2,
  SMPTE_2F2_LFE,
  SMPTE_3F2,
  SMPTE_3F2_LFE,
  SMPTE_3F3R_LFE,
  SMPTE_3F4_LFE,
  SMPTE_MAX // Max number of supported layouts.
};

typedef struct {
  const char* name;
  const unsigned int channels;
  const LAYOUT layout;
  const CHANNEL channel_order[CHANNEL_MAX];
} layout_map;

const layout_map CHANNEL_LAYOUT_MAPS[SMPTE_MAX] = {
  { "dual mono",      2,  SMPTE_DUAL_MONO,      { CHANNEL_LEFT, CHANNEL_RIGHT } },
  { "dual mono lfe",  3,  SMPTE_DUAL_MONO_LFE,  { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_LFE } },
  { "mono",           1,  SMPTE_MONO,           { CHANNEL_MONO } },
  { "mono lfe",       2,  SMPTE_MONO_LFE,       { CHANNEL_MONO, CHANNEL_LFE } },
  { "stereo",         2,  SMPTE_STEREO,         { CHANNEL_LEFT, CHANNEL_RIGHT } },
  { "stereo lfe",     3,  SMPTE_STEREO_LFE,     { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_LFE } },
  { "3f",             3,  SMPTE_3F,             { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER } },
  { "3f lfe",         4,  SMPTE_3F_LFE,         { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_LFE } },
  { "2f1",            3,  SMPTE_2F1,            { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_RCENTER } },
  { "2f1 lfe",        4,  SMPTE_2F1_LFE,        { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_LFE, CHANNEL_RCENTER } },
  { "3f1",            4,  SMPTE_3F1,            { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_RCENTER } },
  { "3f1 lfe",        5,  SMPTE_3F1_LFE,        { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_LFE, CHANNEL_RCENTER } },
  { "2f2",            4,  SMPTE_2F2,            { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_LS, CHANNEL_RS } },
  { "2f2 lfe",        5,  SMPTE_2F2_LFE,        { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_LFE, CHANNEL_LS, CHANNEL_RS } },
  { "3f2",            5,  SMPTE_3F2,            { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_LS, CHANNEL_RS } },
  { "3f2 lfe",        6,  SMPTE_3F2_LFE,        { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_LFE, CHANNEL_LS, CHANNEL_RS } },
  { "3f3r lfe",       7,  SMPTE_3F3R_LFE,       { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_LFE, CHANNEL_RCENTER, CHANNEL_LS, CHANNEL_RS } },
  { "3f4 lfe",        8,  SMPTE_3F4_LFE,        { CHANNEL_LEFT, CHANNEL_RIGHT, CHANNEL_CENTER, CHANNEL_LFE, CHANNEL_RLS, CHANNEL_RRS, CHANNEL_LS, CHANNEL_RS } },
};

bool downmix_float(const float* const in, unsigned long inframes, float* out,
                   unsigned int in_channels, unsigned int out_channels,
                   LAYOUT in_layout, LAYOUT out_layout);

#ifdef __cplusplus
}
#endif

#endif // MIXER
#include "mixer.h"
#include <cassert>  // for assert
#include <cstdint>  // for uint32_t

const int CHANNEL_ORDERING[SMPTE_MAX][CHANNEL_MAX] = {
  // M | L | R | C | LS | RS | RLS | RC | RRS | LFE
  {  -1,  0,  1, -1,  -1,  -1,   -1,  -1,   -1,  -1 },  // DUAL_MONO
  {  -1,  0,  1, -1,  -1,  -1,   -1,  -1,   -1,   2 },  // DUAL_MONO_LFE
  {   0, -1, -1, -1,  -1,  -1,   -1,  -1,   -1,  -1 },  // MONO
  {   0, -1, -1, -1,  -1,  -1,   -1,  -1,   -1,   1 },  // MONO_LFE
  {  -1,  0,  1, -1,  -1,  -1,   -1,  -1,   -1,  -1 },  // STEREO
  {  -1,  0,  1, -1,  -1,  -1,   -1,  -1,   -1,   2 },  // STEREO_LFE
  {  -1,  0,  1,  2,  -1,  -1,   -1,  -1,   -1,  -1 },  // 3F
  {  -1,  0,  1,  2,  -1,  -1,   -1,  -1,   -1,   3 },  // 3F_LFE
  {  -1,  0,  1, -1,  -1,  -1,   -1,   2,   -1,  -1 },  // 2F1
  {  -1,  0,  1, -1,  -1,  -1,   -1,   3,   -1,   2 },  // 2F1_LFE
  {  -1,  0,  1,  2,  -1,  -1,   -1,   3,   -1,  -1 },  // 3F1
  {  -1,  0,  1,  2,  -1,  -1,   -1,   4,   -1,   3 },  // 3F1_LFE
  {  -1,  0,  1, -1,   2,   3,   -1,  -1,   -1,  -1 },  // 2F2
  {  -1,  0,  1, -1,   3,   4,   -1,  -1,   -1,   2 },  // 2F2_LFE
  {  -1,  0,  1,  2,   3,   4,   -1,  -1,   -1,  -1 },  // 3F2
  {  -1,  0,  1,  2,   4,   5,   -1,  -1,   -1,   3 },  // 3F2_LFE
  {  -1,  0,  1,  2,   5,   6,   -1,   4,   -1,   3 },  // 3F3R_LFE
  {  -1,  0,  1,  2,   6,   7,    4,  -1,    5,   3 },  // 3F4_LFE
};

// The downmix matrix from TABLE 2 in the ITU-R BS.775-3[1] defines a way to
// convert 3F2 input data to 1F, 2F, 3F, 2F1, 3F1, 2F2 output data. We extend it
// to convert 3F2-LFE input data to 1F, 2F, 3F, 2F1, 3F1, 2F2 and their LFEs
// output data.
// [1] https://www.itu.int/dms_pubrec/itu-r/rec/bs/R-REC-BS.775-3-201208-I!!PDF-E.pdf

// Number of converted layouts: 1F, 2F, 3F, 2F1, 3F1, 2F2 and their LFEs.
const int SUPPORTED_LAYOUT_NUM = 12;
// Number of input channel for downmix conversion.
const int INPUT_CHANNEL_NUM = 6; // 3F2-LFE
// Max number of possible output channels.
const int MAX_OUTPUT_CHANNEL_NUM = 5; // 2F2-LFE or 3F1-LFE
const float INV_SQRT_2 = 0.707106f; // 1/sqrt(2)
// Each array contains coefficients that will be multiplied with { L, R, C, LFE, LS, RS } channels respectively.
const float DOWNMIX_MATRIX_3F2_LFE[SUPPORTED_LAYOUT_NUM][MAX_OUTPUT_CHANNEL_NUM][INPUT_CHANNEL_NUM] =
{
// 1F Mono
  {
    { INV_SQRT_2, INV_SQRT_2, 1, 0, 0.5, 0.5 }  // M
  },
// 1F Mono-LFE
  {
    { INV_SQRT_2, INV_SQRT_2, 1, 0, 0.5, 0.5 }, // M
    { 0, 0, 0, 1, 0, 0 }                        // LFE
  },
// 2F Stereo
  {
    { 1, 0, INV_SQRT_2, 0, INV_SQRT_2, 0 },     // L
    { 0, 1, INV_SQRT_2, 0, 0, INV_SQRT_2 }      // R
  },
// 2F Stereo-LFE
  {
    { 1, 0, INV_SQRT_2, 0, INV_SQRT_2, 0 },     // L
    { 0, 1, INV_SQRT_2, 0, 0, INV_SQRT_2 },     // R
    { 0, 0, 0, 1, 0, 0 }                        // LFE
  },
// 3F
  {
    { 1, 0, 0, 0, INV_SQRT_2, 0 },              // L
    { 0, 1, 0, 0, 0, INV_SQRT_2 },              // R
    { 0, 0, 1, 0, 0, 0 }                        // C
  },
// 3F-LFE
  {
    { 1, 0, 0, 0, INV_SQRT_2, 0 },              // L
    { 0, 1, 0, 0, 0, INV_SQRT_2 },              // R
    { 0, 0, 1, 0, 0, 0 },                       // C
    { 0, 0, 0, 1, 0, 0 }                        // LFE
  },
// 2F1
  {
    { 1, 0, INV_SQRT_2, 0, 0, 0 },              // L
    { 0, 1, INV_SQRT_2, 0, 0, 0 },              // R
    { 0, 0, 0, 0, INV_SQRT_2, INV_SQRT_2 }      // S
  },
// 2F1-LFE
  {
    { 1, 0, INV_SQRT_2, 0, 0, 0 },              // L
    { 0, 1, INV_SQRT_2, 0, 0, 0 },              // R
    { 0, 0, 0, 1, 0, 0 },                       // LFE
    { 0, 0, 0, 0, INV_SQRT_2, INV_SQRT_2 }      // S
  },
// 3F1
  {
    { 1, 0, 0, 0, 0, 0 },                       // L
    { 0, 1, 0, 0, 0, 0 },                       // R
    { 0, 0, 1, 0, 0, 0 },                       // C
    { 0, 0, 0, 0, INV_SQRT_2, INV_SQRT_2 }      // S
  },
// 3F1-LFE
  {
    { 1, 0, 0, 0, 0, 0 },                       // L
    { 0, 1, 0, 0, 0, 0 },                       // R
    { 0, 0, 1, 0, 0, 0 },                       // C
    { 0, 0, 0, 1, 0, 0 },                       // LFE
    { 0, 0, 0, 0, INV_SQRT_2, INV_SQRT_2 }      // S
  },
// 2F2
  {
    { 1, 0, INV_SQRT_2, 0, 0, 0 },              // L
    { 0, 1, INV_SQRT_2, 0, 0, 0 },              // R
    { 0, 0, 0, 0, 1, 0 },                       // LS
    { 0, 0, 0, 0, 0, 1 }                        // RS
  },
// 2F2-LFE
  {
    { 1, 0, INV_SQRT_2, 0, 0, 0 },              // L
    { 0, 1, INV_SQRT_2, 0, 0, 0 },              // R
    { 0, 0, 0, 1, 0, 0 },                       // LFE
    { 0, 0, 0, 0, 1, 0 },                       // LS
    { 0, 0, 0, 0, 0, 1 }                        // RS
  }
};

// Convert data from 3F2 or 3F2-LFE to 1F, 2F, 3F, 2F1, 3F1, 2F2 and their LFEs.
template<class T>
bool downmix_3f2(const T* const in, unsigned long inframes, T* out, LAYOUT in_layout, LAYOUT out_layout) {
  if ((in_layout != SMPTE_3F2 && in_layout != SMPTE_3F2_LFE) ||
      out_layout < SMPTE_MONO || out_layout > SMPTE_2F2_LFE) {
    return false;
  }

  unsigned int in_channels = CHANNEL_LAYOUT_MAPS[in_layout].channels;
  unsigned int out_channels = CHANNEL_LAYOUT_MAPS[out_layout].channels;

  // Conversion from 3F2 to 2F2-LFE or 3F1-LFE is allowed, so we use '<=' instead of '<'.
  assert(out_channels <= in_channels);

  long out_index = 0;
  auto & downmix_matrix = DOWNMIX_MATRIX_3F2_LFE[out_layout - SMPTE_MONO]; // The matrix is started from mono.
  for (unsigned long i = 0; i < inframes * in_channels; i += in_channels) {
    for (unsigned int j = 0; j < out_channels; ++j) {
      out[out_index + j] = 0; // Clear its value.
      for (unsigned int k = 0 ; k < INPUT_CHANNEL_NUM ; ++k) {
        // 3F2-LFE has 6 channels: L, R, C, LFE, LS, RS, while 3F2 has only 5
        // channels: L, R, C, LS, RS. Thus, we need to append 0 to LFE(index 3)
        // to simulate a 3F2-LFE data when input layout is 3F2.
        T data = (in_layout == SMPTE_3F2_LFE) ? in[i + k] : (k == 3) ? 0 : in[i + ((k < 3) ? k : k - 1)];
        out[out_index + j] += downmix_matrix[j][k] * data;
      }
    }
    out_index += out_channels;
  }

  return true;
}

/* Map the audio data by channel name. */
template<class T>
bool mix_remap(const T* const in, unsigned long inframes, T* out, LAYOUT in_layout, LAYOUT out_layout) {
  assert(in_layout != out_layout);
  unsigned int in_channels = CHANNEL_LAYOUT_MAPS[in_layout].channels;
  unsigned int out_channels = CHANNEL_LAYOUT_MAPS[out_layout].channels;

  uint32_t in_layout_mask = 0;
  for (unsigned int i = 0 ; i < in_channels ; ++i) {
    in_layout_mask |= 1 << CHANNEL_LAYOUT_MAPS[in_layout].channel_order[i];
  }

  uint32_t out_layout_mask = 0;
  for (unsigned int i = 0 ; i < out_channels ; ++i) {
    out_layout_mask |= 1 << CHANNEL_LAYOUT_MAPS[out_layout].channel_order[i];
  }

  // If there is no matched channel, then do nothing.
  if (!(out_layout_mask & in_layout_mask)) {
    return false;
  }

  long out_index = 0;
  for (unsigned long i = 0; i < inframes * in_channels; i += in_channels) {
    for (unsigned int j = 0; j < out_channels; ++j) {
      CHANNEL channel = CHANNEL_LAYOUT_MAPS[out_layout].channel_order[j];
      uint32_t channel_mask = 1 << channel;
      // out[out_index + j] = (in_layout_mask & channel_mask) ? in[ i + CHANNEL_ORDERING[in_layout][channel] ] : 0;
      int channel_index = CHANNEL_ORDERING[in_layout][channel];
      if (in_layout_mask & channel_mask) {
        assert(channel_index != -1);
        out[out_index + j] = in[i + channel_index];
      } else {
        assert(channel_index == -1);
        out[out_index + j] = 0;
      }
    }
    out_index += out_channels;
  }

  return true;
}

/* Drop the extra channels beyond the provided output channels. */
template<class T>
bool downmix_fallback(const T* const in, unsigned long inframes, T* out, unsigned int in_channels, unsigned int out_channels) {
  assert(in_channels >= out_channels);

  long out_index = 0;
  for (unsigned long i = 0; i < inframes * in_channels; i += in_channels) {
    for (unsigned int j = 0; j < out_channels; ++j) {
      out[out_index + j] = in[i + j];
    }
    out_index += out_channels;
  }

  return true;
}

template<class T>
bool downmix(const T* const in, unsigned long inframes, T* out,
             unsigned int in_channels, unsigned int out_channels,
             LAYOUT in_layout, LAYOUT out_layout) {
  if (in_channels < out_channels || in_layout == out_layout) {
    return false;
  }

  if (CHANNEL_LAYOUT_MAPS[in_layout].channels == in_channels &&
      CHANNEL_LAYOUT_MAPS[out_layout].channels == out_channels) {
    if (downmix_3f2(in, inframes, out, in_layout, out_layout)) {
      return true;
    }

    if (mix_remap(in, inframes, out, in_layout, out_layout)) {
      return true;
    }
  }
  return downmix_fallback(in, inframes, out, in_channels, out_channels);
}

bool
downmix_float(const float* const in, unsigned long inframes, float* out,
              unsigned int in_channels, unsigned int out_channels,
              LAYOUT in_layout, LAYOUT out_layout)
{
  return downmix(in, inframes, out, in_channels, out_channels, in_layout, out_layout);
}
CC=g++
CFLAGS=-Wall -std=c++14 -Wc++11-extensions

all: mixer.o
	$(CC) $(CFLAGS) test_mixer.cpp mixer.o -o test_mixer

mixer.o: mixer.cpp
# -c: Only run preprocess, compile, and assemble steps
# -g: Generate source-level debug information
	$(CC) $(CFLAGS) -c -g mixer.cpp

clean:
	rm test_mixer *.o
/*
 * Convert SMPTE 3F2 audio data from 5 channels to 1 ~ 4 channel data with
 * different layouts.
 *
 * SMPTE channel layouts:
 * --------------------------------------------------
 * DUAL-MONO      L   R
 * DUAL-MONO-LFE  L   R   LFE
 * MONO           M
 * MONO-LFE       M   LFE
 * STEREO         L   R
 * STEREO-LFE     L   R   LFE
 * 3F             L   R   C
 * 3F-LFE         L   R   C    LFE
 * 2F1            L   R   S
 * 2F1-LFE        L   R   LFE  S
 * 3F1            L   R   C    S
 * 3F1-LFE        L   R   C    LFE S
 * 2F2            L   R   LS   RS
 * 2F2-LFE        L   R   LFE  LS   RS
 * 3F2            L   R   C    LS   RS
 * 3F2-LFE        L   R   C    LFE  LS   RS
 * 3F3R-LFE       L   R   C    LFE  BC   LS   RS
 * 3F4-LFE        L   R   C    LFE  Rls  Rrs  LS   RS
 */
#include <stdio.h>  // for basic io.
#include <stdlib.h> // for malloc.

#define DEBUG 0 // Set 1 to log the debugging messages.
#define LOG(...) DEBUG && fprintf(stdout, __VA_ARGS__)

// This only can NOT be used for the dynamically allocated array(pointer).
#define ARRAY_SIZE(x) ((int) (sizeof(x) / sizeof(x[0])))

// For input channel data:
const float L = 1;
const float R = 2;
const float C = 3;
const float LS = 4;
const float RS = 5;
#define INPUT_CHANNEL_NUMS 5 // 3F2 has 5 channels.

enum LAYOUT {
  SMPTE_MONO = 0,
  SMPTE_STEREO,
  SMPTE_3F,
  SMPTE_2F1,
  SMPTE_3F1,
  SMPTE_2F2,
  SMPTE_MAX, // Max number of supported layouts.
};
#define MAX_OUTPUT_CHANNEL_NUMS 4 // Max number of channel supported for the layout.

typedef struct {
  char* name;
  unsigned int channels;
  enum LAYOUT layout;
} layout_map;

static const layout_map CHANNEL_LAYOUT_MAPS[] = {
  { "mono", 1, SMPTE_MONO },
  { "stereo", 2, SMPTE_STEREO },
  { "3f", 3, SMPTE_3F },
  { "2f1", 3, SMPTE_2F1 },
  { "3f1", 4, SMPTE_3F1 },
  { "2f2", 4, SMPTE_2F2 },
};

// Constant for downmix coefficients matrix.
const float SQRT_1_2 = 0.70710678118; // 1/sqrt(2)

// The following matric is refered from TABLE 2 in the ITU-R BS.775-3.
// https://www.itu.int/dms_pubrec/itu-r/rec/bs/R-REC-BS.775-3-201208-I!!PDF-E.pdf
//
// Each array contains coefficients that will be multiplied with
// { L, R, C, LS, RS } channels respectively.
const float DOWNMIX_MATRIX[SMPTE_MAX][MAX_OUTPUT_CHANNEL_NUMS][INPUT_CHANNEL_NUMS] =
{
// 1F Mono
  {
    { SQRT_1_2, SQRT_1_2, 1, 0.5, 0.5 },  // M
  },
// 2F Stereo
  {
    { 1, 0, SQRT_1_2, SQRT_1_2, 0 },      // L
    { 0, 1, SQRT_1_2, 0, SQRT_1_2 }       // R
  },
// 3F
  {
    { 1, 0, 0, SQRT_1_2, 0 },             // L
    { 0, 1, 0, 0, SQRT_1_2 },             // R
    { 0, 0, 1, 0, 0 }                     // C
  },
// 2F1
  {
    { 1, 0, SQRT_1_2, 0, 0 },             // L
    { 0, 1, SQRT_1_2, 0, 0 },             // R
    { 0, 0, 0, SQRT_1_2, SQRT_1_2 }       // S
  },
// 3F1
  {
    { 1, 0, 0, 0, 0 },                    // L
    { 0, 1, 0, 0, 0 },                    // R
    { 0, 0, 1, 0, 0 },                    // C
    { 0, 0, 0, SQRT_1_2, SQRT_1_2 },      // S
  },
// 2F2
  {
    { 1, 0, SQRT_1_2, 0, 0 },             // L
    { 0, 1, SQRT_1_2, 0, 0 },             // R
    { 0, 0, 0, 1, 0 },                    // LS
    { 0, 0, 0, 0, 1 },                    // RS
  },
};

void downmix(float* in, float* out, unsigned const int out_ch_num, unsigned const int out_type) {
  for (unsigned int i = 0 ; i < out_ch_num ; ++i) {
    out[i] = 0; // Clear its value.
    for (unsigned int j = 0 ; j < INPUT_CHANNEL_NUMS ; ++j) {
      out[i] += DOWNMIX_MATRIX[out_type][i][j] * in[j];
    }
  }
}

int main() {
  float input[INPUT_CHANNEL_NUMS] = { L, R, C, LS, RS }; // 3F2 input data.

  // Downmix for each layout.
  for (int i = 0 ; i < ARRAY_SIZE(CHANNEL_LAYOUT_MAPS) ; ++i) {
    LOG("\n%s\n", CHANNEL_LAYOUT_MAPS[i].name);

    float* output = (float*) malloc(sizeof(float) * CHANNEL_LAYOUT_MAPS[i].channels);
    downmix(input, output, CHANNEL_LAYOUT_MAPS[i].channels, CHANNEL_LAYOUT_MAPS[i].layout);

    for (int j = 0 ; j < CHANNEL_LAYOUT_MAPS[i].channels ; ++j) {
      LOG("ch[%d] = %f\n", j, output[j]);
    }

    free(output);
  }

  return 0;
}
#ifndef COMMON
#define COMMON

#include <iostream>
#include <unistd.h>

#define DEBUG false // Set true to log the debugging messages.
#define LOG(...) DEBUG && fprintf(stdout, __VA_ARGS__)

template<typename T, size_t N>
constexpr size_t
ARRAY_LENGTH(T(&)[N])
{
  return N;
}

#endif /* COMMON */

以上是关于c_cpp 音频混音器:缩混5.1到立体声或重映射通道的主要内容,如果未能解决你的问题,请参考以下文章

音频处理使用 Adobe Audition 录制电脑内部声音 ( 启用电脑立体声混音 | Adobe Audition 中设置音频设备 | Adobe Audition 内录 )

windows编程:在没有支持“立体声混音”的驱动程序的情况下抓取音频

如何在 java/xuggler 中混音多个音频通道?

mp3格式转换后没有了立体声

Adobe Auditon使用功能:将音频文件切分成多段,并将每段音频分别保存到不同的文件中

是否可以在不重新编码的情况下将立体声合并/缩混为单声道(m4a 或 opus)