html HTML5音频+语音到文本...一旦启用麦克风和语音识别,请说“facebook”,“hack”或“what is my name”

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了html HTML5音频+语音到文本...一旦启用麦克风和语音识别,请说“facebook”,“hack”或“what is my name”相关的知识,希望对你有一定的参考价值。

<!DOCTYPE html>
<html>
<head>
  <title>HTML5 Web audio capture + Speech Recognition</title>
  <link href="http://www.smartjava.org/examples/webaudio-filters/css/bootstrap.css" rel="stylesheet"/>
</head>

<body>
  <div class="container">
    <h1 class="pagination-centered">HTML5 Web audio capture + Speech Recognition</h1><br />

    <div class="span1">
      <span>Microphone:</span>
    </div>

    <div class="span2">
      <div class="btn-toolbar" style="margin-top: -7px; margin-left: 10px">
        <div class="btn-group">
          <a class="btn btn-primary" id="mic-start" href="#"><i id="mic-start-icon" class="icon-play icon-white"></i></a>
          <a class="btn btn-primary disabled" id="mic-stop" href="#"><i id="mic-stop-icon" class="icon-stop icon-white"></i></a>
        </div>
      </div>
    </div>

    <div class="span1">
      <span>Recognition:</span>
    </div>

    <div class="span2">
      <div class="btn-toolbar" style="margin-top: -7px; margin-left: 10px">
        <div class="btn-group">
          <a class="btn btn-primary" id="recognition-start" href="#"><i id="recognition-start-icon" class="icon-play icon-white"></i></a>
          <a class="btn btn-primary disabled" id="recognition-stop" href="#"><i id="recognition-stop-icon" class="icon-stop icon-white"></i></a>
        </div>
      </div>
    </div>

  </div>

  <div class="container">
    <div class="row">
      <div class="span4">
        Spectrogram:
        <canvas id="spectrogram" width="300" height="256" style="display: block; background-color: black ;"></canvas>
      </div>

      <div class="span4">
        Waveform:
        <canvas id="waveform" width="300" height="256" style="display: block; background-color: black ;"></canvas>
      </div>

      <div class="span4">
        Wave:
        <canvas id="wave" width="530" height="100" style="display: block; background-color: white ;"></canvas>
      </div>
    </div>
  </div>

  <div class="container">
    <div id="results">
      <span class="final" id="final_span"></span> <span class="interim" id=
      "interim_span"></span>
    </div>
  </div>

<script src="http://www.smartjava.org/examples/webaudio-filters/js/jquery-1.8.2.js"></script>
<script src="http://www.smartjava.org/examples/webaudio-filters/js/jquery-ui-1.9.1.custom.js"></script>
<script src="http://www.smartjava.org/examples/webaudio-filters/js/bootstrap.min.js"></script>
<script src="http://www.smartjava.org/examples/webaudio-filters/js/chroma.js"></script>
<script>
///////////////////////////////////////////////////////////////////////////
// Web Audio API chr
///////////////////////////////////////////////////////////////////////////

// some globals
var context = new webkitAudioContext();
var audioBuffer;
var sourceNode;
var mediaStreamSource;
var osc = context.createOscillator();
var filter = context.createBiquadFilter();

filter.type = 3;
filter.frequency.value = 440;
filter.Q.value = 0;
filter.gain.value = 0;


// state variables
var micRunning = false;

// setup a javascript node
var javascriptNode = context.createJavaScriptNode(2048, 1, 1);

// connect to destination, else it isn't called
javascriptNode.connect(context.destination);
// when the javascript node is called
// we use information from the analyzer node
// to draw the volume
javascriptNode.onaudioprocess = function () {

    // get the average for the first channel
    var array = new Uint8Array(analyser.frequencyBinCount);
    analyser.getByteFrequencyData(array);

    if (micRunning) {
      drawSpectrogram(array);

      var array2 = new Uint8Array(analyser.frequencyBinCount);
      analyser.getByteTimeDomainData(array2);
      drawWave(array2);
      drawWaveform(array2);
    }

}

// setup a analyzer
var analyser = context.createAnalyser();
analyser.smoothingTimeConstant = 0;
analyser.fftSize = 512;

// create a buffer source node
filter.connect(analyser);
analyser.connect(javascriptNode);

// used for color distribution
var hot = new chroma.ColorScale({
    colors:['#000000', '#ff0000', '#ffff00', '#ffffff'],
    positions:[0, .25, .75, 1],
    mode:'rgb',
    limits:[0, 350]
});

$(document).ready(function () {
    setupHandlers();
});

function setupHandlers() {
  $("#mic-start").click(function () {
    navigator.webkitGetUserMedia({audio:true},function(stream) {
      mediaStreamSource = context.createMediaStreamSource(stream);
      mediaStreamSource.connect(filter);

      micRunning = true;
      $("#mic-start").addClass("disabled");
      $("#mic-stop").removeClass("disabled");
    });
  });

  $("#mic-stop").click(function () {
    mediaStreamSource.disconnect(filter);
    micRunning = false;

    $("#mic-stop").addClass("disabled");
    $("#mic-start").removeClass("disabled");
  });
}

// log if an error occurs
function onError(e) {
  console.log(e);
}

function drawWave(array) {
    var ctx = $("#wave").get()[0].getContext("2d");
    ctx.fillStyle = "#000000"
    ctx.clearRect(0, 0, 530, 100);

    for ( var i = 0; i < (array.length); i++ ){
      var value = array[i];
      // console.log("values: " + i + ", " + value);
      // ctx.fillRect(i+22,100-value,1,1);
      ctx.fillRect(i,190-value,1,1);
    }
};

function drawWaveform(array) {
  var ctx = $("#waveform").get()[0].getContext("2d");
  // create a temp canvas we use for copying
  var tempCanvas = document.createElement("canvas");
  tempCanvas.width = 460;
  tempCanvas.height = 300;
  var tempCtx = tempCanvas.getContext("2d");

  // copy the current canvas onto the temp canvas
  var canvas = document.getElementById("waveform");
  tempCtx.drawImage(canvas, 0, 0, 530, 100);

  var waveValues = [];
  var min = Math.min.apply(Math, array);
  var max = Math.max.apply(Math, array);
  for(min; min < max; min++) { waveValues.push(min); }

  for (var i = 0; i < waveValues.length; i++) {
    // draw each pixel with the specific color
    var value = waveValues[i];
    ctx.fillStyle = hot.getColor(value).hex();

    // draw the line at the right side of the canvas
    ctx.fillRect(1, 250-(value/1.5), 1, 1);
  }

  ctx.translate(1, 0);

  // set translate on the canvas
  // ctx.translate(-1, 0);

  // draw the copied image
  // ctx.drawImage(waveform, 0, 0, 300, 256, 0, 0, 300, 256);

  // reset the transformation matrix
  // ctx.setTransform(1, 0, 0, 1, 0, 0);
}


function drawSpectrogram(array) {
  var ctx = $("#spectrogram").get()[0].getContext("2d");
  // create a temp canvas we use for copying
  var tempCanvas = document.createElement("canvas");
  tempCanvas.width = 460;
  tempCanvas.height = 300;
  var tempCtx = tempCanvas.getContext("2d");

  // copy the current canvas onto the temp canvas
  var canvas = document.getElementById("spectrogram");
  tempCtx.drawImage(canvas, 0, 0, 530, 100);

  // iterate over the elements from the array
  for (var i = 0; i < array.length; i++) {
    // draw each pixel with the specific color
    var value = array[i];
    ctx.fillStyle = hot.getColor(value).hex();

    // draw the line at the right side of the canvas
    ctx.fillRect(300 - 1, 256 - i, 1, 1);
  }

  // set translate on the canvas
  ctx.translate(-1, 0);
  // draw the copied image
  ctx.drawImage(spectrogram, 0, 0, 300, 256, 0, 0, 300, 256);

  // reset the transformation matrix
  ctx.setTransform(1, 0, 0, 1, 0, 0);
}

/////////////////////////////////////////////////////////////
// Voice Rec
/////////////////////////////////////////////////////////////

var create_email = false;
var final_transcript = '';
var recognizing = false;
var ignore_onend;
var start_timestamp;
var recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;

recognition.onstart = function() {
  recognizing = true;
  console.log("on start");
};

recognition.onerror = function(event) {
  if (event.error == 'no-speech') {
    console.log("event error no-speech");
    ignore_onend = true;
  }
  if (event.error == 'audio-capture') {
    console.log("event error audio-capture");
    ignore_onend = true;
  }
  if (event.error == 'not-allowed') {
    if (event.timeStamp - start_timestamp < 100) {
      console.log("event error not allowed");
    } else {
      console.log("event error permission denied");
    }
    ignore_onend = true;
  }
};

recognition.onend = function() {
  recognizing = false;
  if (ignore_onend) {
    return;
  }

  if (!final_transcript) {
    console.log("info start");
    return;
  }

  if (window.getSelection) {
    window.getSelection().removeAllRanges();
    var range = document.createRange();
    range.selectNode(document.getElementById('final_span'));
    window.getSelection().addRange(range);
  }
};

recognition.onresult = function(event) {
  var interim_transcript = '';
  var last_command = '';

  if (typeof(event.results) == 'undefined') {
    recognition.onend = null;
    recognition.stop();
    return;
  }
  for (var i = event.resultIndex; i < event.results.length; ++i) {
    if (event.results[i].isFinal) {
      final_transcript += event.results[i][0].transcript;
    } else {
      interim_transcript += event.results[i][0].transcript;
      last_command = event.results[i][0].transcript;
    }
  }

  final_transcript = capitalize(final_transcript);
  final_span.innerHTML = linebreak(final_transcript);
  interim_span.innerHTML = linebreak(interim_transcript);

  console.log("final: ", final_transcript);
  console.log("interim: ", interim_transcript);
  console.log("last command: ", last_command);

  if (final_transcript || interim_transcript) {
    if(last_command.match(/facebook/i)) {
      alert('You just said Facebook. Dude. No.');
    }
    if(last_command.match(/hack/i)) {
      alert("Woah! Hackathon!.");
    }
    if(last_command.match(/what is my name/i)) {
      var names = ["Wil Hung", "Philip McKrack","Jack Mehoff","Phil Atio","Hugh Jareckson","Hugh Janus","Ijaz Fhated","Dawn Keibals","Dixie Normous","Emersom Biggins","Craven Moorhead","Anita Lay","Mike Hunt","Aster Rhoids","Willie Hardigan","Suq Madiq"];
      alert(names[Math.floor(Math.random()*11)]);
    }
  }
};


var two_line = /\n\n/g;
var one_line = /\n/g;
function linebreak(s) {
  return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
}

var first_char = /\S/;
function capitalize(s) {
  return s.replace(first_char, function(m) { return m.toUpperCase(); });
}

$('#recognition-start').on('click', function() {
  if (recognizing) {
    recognition.stop();
    return;
  }
  final_transcript = '';
  recognition.lang = 6;
  recognition.start();
  ignore_onend = false;
  start_timestamp = event.timeStamp;
  $("#recognition-start").addClass("disabled");
  $("#recognition-stop").removeClass("disabled");
});

</script>
</body>
</html>

以上是关于html HTML5音频+语音到文本...一旦启用麦克风和语音识别,请说“facebook”,“hack”或“what is my name”的主要内容,如果未能解决你的问题,请参考以下文章

向上滚动文本并在使用 html5 音频读取文本时将其隐藏

html5 | 通过js实现对网页文本内容语音朗读 | 教程

为 HTML5 从设备中的线路创建实时音频流

文本转音频(百度语音合成api)(python)

有没有办法将音频文件发送到语音到文本识别

如何使用 python 和音频文件创建自定义文本到语音?