h264,265实时视频流解码及人脸追踪的实现

Posted 从小就很悬

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了h264,265实时视频流解码及人脸追踪的实现相关的知识,希望对你有一定的参考价值。

以下为本人实际工作中经验所得分享,

日常项目中涉及到实时视频流播放,大都会选择flvJs,后者videoJs。而由于这两款无法满足实际需求并且无法解码h265视频

流,所以在后端C++的配合下,一起写了一套自用的视频流播放器,视频解码使用的是libffmpeg,找不到资源的可以私信我,

原理就是利用wasm编写c++代码使用ffmpeg进行视频解码。由于算法解码压力,解码动作在浏览器完成对电脑及带宽都有一定要求。

视频播放我们采用了两种方式来实现,可以通过配置设置,一种是使用video播放mediaSource的流媒体,一种是使用webgl绘

制画面。mediaSource支持播放流媒体片段,这样播放器无需等待所有视频资源全都下载完再播放,可以不断的向播放器喂视

频流片段。


定义websockt连接类,管理信令及数据交互,分发事件

export default class WSReader extends Event 

  constructor(url) 
    super(WSReader);
    this.TAG = [WSReader];
    this.ws = new WebSocket(url);
    this.ws.binaryType = arraybuffer;
    this.ws.onopen = this._onWebSocketOpen.bind(this);
    this.ws.onerror = this._onWebSocketError.bind(this);
    this.ws.onmessage = this._onWebSocketMessage.bind(this);
    this.ws.onclose = this._onWebSocketClose.bind(this);

    this.wsMethods = 
      open: open, // 请求mime
      play: play, // 请求推流
      pause: pause, // 停止推流
      close: close, // 关闭ws连接
      slow: slow, // 请求降低推流频率
      fast: fast, // 请求提高推流频率
      complete: complete, // 视频流已经全部发送完。
    ;
    this.seq = 1;
    this.sendRate = 1; // 视频流传输频率,用来调整播放速度
    this.isFullPause = false;

  // 处理websocket message
  _onWebSocketMessage(ev) 
    let data = ev;
    if (data instanceof ArrayBuffer) 
      this.dispatch(VideoEvents.VIDEO_EVENT, data);
     else 
      this.wsMessageHandle(data);
    
  
  // 根据ws主体内容来分配事件
  wsMessageHandle(data) 
    let mes = JSON.parse(data);
    switch (mes.method) 
      case open:
        debug.log(`get mime $mes.mime`);
        this.openHandle(mes)
        break;
      case play:
        debug.log(`ws play signal`);
        this.playHandle(mes)
        break;
      case pause:
        debug.log(`ws pause signal`);
        this.pauseHandle(mes)
        break;
      case close:
        debug.log(`ws close signal`);
        this.closeHandle()
        break;
      case slow:
        debug.log(`ws slow signal`);
        this.speedHandle(mes)
        break;
      case fast:
        debug.log(`ws fast signal`);
        this.speedHandle(mes)
        break;
      case complete:
        debug.log(`ws complete signal`);
        this.completeHandle()
        break;
      case mediaChange:
        debug.log(`ws mediaChange signal`);
        this.mediaChangeHandle(mes)
        break;
    
  
  // 获取到MIME
  _onWsGetMime(data) 
    if (data.ret == 0) 
      this.videoInfo.mime = data.mime;
      if (this.options.renderType === webgl) 
        // 获取到mime,根据mime判断为h264还是h265,初始化decoder
        this.decoder.init(data.mime.indexOf(avc) !== -1 ? 0 : 1);
      
      this.createBuffer();
      // 成功获取到mime, 接下来发送play指令。
      this.videoReader.play();
     else 
      debug.log(this.TAG, `get mime type failed`);
      this.dispatch(HSPlayer.Events.SERVER_PLAY_ERR, msg: get mime type failed)
    
  

定义对外基础类,包含播放器初始化,基础属性配置(倍速列表,缓冲区大小,播放类型等),播放器状态监听及异常分发。处理视频流数据

export default class HSPlayer extends Event 

  // 向外开放的监听事件类型。
  static get Events() 
    return
      // 请求播放视频失败
      SERVER_PLAY_ERR: SERVER_PLAY_ERR,
      // 请求暂停视频失败
      SERVER_PAUSE_ERR: SERVER_PAUSE_ERR,
      // 请求变速失败
      SERVER_SPEED_ERR: SERVER_SPEED_ERR,
      // 服务器的连接出现错误
      SERVER_NET_ERR: SERVER_NET_ERR,
      // 由于网络异常导致到连接中断
      ABNORMAL_DISCONNECT: ABNORMAL_DISCONNECT,
      // 浏览器不支持当前视频的格式
      CHROME_CODEC_UNSUPPORT: CHROME_CODEC_UNSUPPORT,
      // 视频有缺失或被污染
      VIDEO_STREAM_INCORRECT: VIDEO_STREAM_INCORRECT,
      // 实时视频流缓冲太长,需要seek到最新点
      VIDEO_LIVE_STREAM_TOO_LOOG: VIDEO_LIVE_STREAM_TOO_LOOG,
      // 通知前端播放成功
      VIDEO_PLAY_SUCESS: VIDEO_PLAY_SUCESS,
    ;
  
  static isSupported(mimeCode) 
    return (window.MediaSource && window.MediaSource.isTypeSupported(mimeCode));
  
  constructor(options) 
    super(HSPlayer);
    this.TAG = [HSPlayer];
    let defaults = 
      node: , // video 节点
      cacheBufferTime: 60, // 回放最大缓存时长 单位秒
      cacheBufferMinTime: 30, // 回放缓存小于cacheBufferMinTime时,重新获取流
      cleanOffset: 0.8, // 清除buf时剩余的时长,单位秒
      debug: false, // 是否打印出控制台信息
      delayPlay: 0, // 获取实时视频流可以设置延时播放,单位ms
      type: live, // live 直播, playback 回放
      wsUrl: null, // websocket 地址,目前项目信令跟视频流都用同一个地址
      flushTime: 3 * 1000, // 清空buffer的间隔,用于直播
      drawArInfo: false, // 是否需要画ar信息
      renderType: null, // 如果是 webgl,则使用本地解码。
    ;
  // _onWsVideoBuffer
  /*
  * int8_t version;
    int16_t headLen;
    int8_t frameNum;
    int8_t type;
    int8_t codec;

    int32_t beginTimeStampSec;
    int32_t beginTimeStampMs;

    int32_t EndTimeStampSec;
    int32_t EndTimeStampMs;
  * */
  _onWsVideoBuffer(originData) 
    // 判断是否有头信息
    let headMagic = new Uint8Array(originData.slice(0, 4));
    // 获取头部长度
    let hAr, hLen = 0;
    if (
      headMagic[0] == 117 &&
      headMagic[1] == 109 &&
      headMagic[2] == 120 &&
      headMagic[3] == 115
    ) 
      hAr = new Uint8Array(originData.slice(5, 8));
      hLen = (hAr[0] << 8) + hAr[1];
      if (!this.firstFrameTime && originData) 
        let hBuffer = new Uint8Array(originData.slice(0, hLen));
        // 前6个字节是version, headLen, type....等
        // 后8个字节是帧结束时间,暂时不用
        let sec = (hBuffer[10] << 24) +
          (hBuffer[11] << 16) +
          (hBuffer[12] << 8) +
          hBuffer[13];
        let ms = (hBuffer[14] << 24) +
          (hBuffer[15] << 16) +
          (hBuffer[16] << 8) +
          hBuffer[17];
        this.firstFrameTime = sec * 1000 + ms;
      
    
    let data = originData.slice(hLen);

    if (this.options.renderType === webgl) 
      // ar数据
      let arLenBuf = new Uint8Array(originData.slice(27, 29));
      let arLen = (arLenBuf[0] << 8) + arLenBuf[1];
      if (arLen) 
        let arTarget = new Uint8Array(originData.slice(29, arLen + 29));
        let arJson = ;
        arTarget.forEach(x => 
          arJson += String.fromCharCode(x);
        )
        let targetObj = JSON.parse(arJson);
        if ( targetObj.arInfo && targetObj.arInfo.objList ) 
          this.decoder.feed(data, targetObj.arInfo.objList);
         else 
          this.decoder.feed(data, null);
        
       else 
        this.decoder.feed(data, null);
      
      return false;
    
    // 如果是回放则把没有播放的buffer放入pendingBufs。直播则直接遗弃
    if (this.options.type != live) 
      while (this.pendingBufs.length > 0 && this.bufferController) 
        let buf = this.pendingBufs.shift();
        this.bufferController.feed(buf);
      
      if(this.bufferController) 
        this.bufferController.feed(data);
       else 
        this.pendingBufs.push(data);
      
     else 
      if(this.bufferController) 
        this.bufferController.feed(data);
      
    
  

定义h264,h265解码类,因为此webgl模式下是由前端负责解码工作并绘制,底层只负责推送裸流,如果使用mediaSource模式则是c++将视频解码之后再推送过来。所以需要使用Decoder类,主要负责视频流队列管理,ar队列管理,定时解码,发布解码之后的数据

export default class Decoder 
  constructor(node) 
    this.queue = []; // 队列
    this.arIndex = -1;
    this.arQueue = ; // ar信息队列
    this.LOG_LEVEL_FFMPEG = 2;
    this.LOG_LEVEL_JS = 0;
    this.LOG_LEVEL_WASM = 1;
    this.node = node;
    // 视频画面画布
    canvas = document.createElement(canvas);
    canvas.width = node.clientWidth;
    canvas.setAttribute(
       style,
       `width: 100%;height: auto;position: absolute;left: 0;top:0;`
    );
    // ar信息画布
    arCanvas = document.createElement(canvas);
    arCanvas.width = node.clientWidth;
    arCanvas.setAttribute(
      style,
      `width: 100%;height: auto;position: absolute;left: 0;top:0;`
    );
    node.parentNode.appendChild(canvas);
    node.parentNode.appendChild(arCanvas);
  

  feed(buffer, ar) 
    this.arIndex++;
    this.queue.push(buffer);
    if (ar) 
      this.arQueue[this.arIndex] = ar;
    
  
  init(decoderType) 
    videoCallback = Module.addFunction((addr_y, addr_u, addr_v, stride_y, stride_u, stride_v, width, height, pts) => 
      // console.log("[%d]In video callback, size = %d * %d, pts = %d", ++videoSize, width, height, pts)
      let size = width * height + (width / 2)  * (height / 2) + (width / 2)  * (height / 2);
      let data = new Uint8Array(size);
      let pos = 0;
      for(let i=0; i< height; i++) 
        let src = addr_y + i * stride_y
        let tmp = HEAPU8.subarray(src, src + width)
        tmp = new Uint8Array(tmp)
        data.set(tmp, pos)
        pos += tmp.length
      
      for(let i=0; i< height / 2; i++) 
        let src = addr_u + i * stride_u
        let tmp = HEAPU8.subarray(src, src + width / 2)
        tmp = new Uint8Array(tmp)
        data.set(tmp, pos)
        pos += tmp.length
      
      for(let i=0; i< height / 2; i++) 
        let src = addr_v + i * stride_v
        let tmp = HEAPU8.subarray(src, src + width / 2)
        tmp = new Uint8Array(tmp)
        data.set(tmp, pos)
        pos += tmp.length
      
      var obj = 
        data: data,
        width,
        height
      
      this.displayVideoFrame(obj);
      this.displayVideoAr(pts, width, height);
    );
    var ret = Module._openDecoder(decoderType, videoCallback, this.LOG_LEVEL_WASM)
    if(ret == 0) 
      console.log("openDecoder success");
     else 
      console.error("openDecoder failed with error", ret);
      return;
    
    var pts = 0;

    // 定时解码
    setInterval(() => 
      const data = this.queue.shift();
      if (data) 
        const typedArray = new Uint8Array(data);
        const size = typedArray.length;

        var cacheBuffer = Module._malloc(size);
        Module.HEAPU8.set(typedArray, cacheBuffer);

        Module._decodeData(cacheBuffer, size, pts++)
        if (cacheBuffer != null) 
          Module._free(cacheBuffer);
          cacheBuffer = null;
        
        // if(size < CHUNK_SIZE) 
        //     console.log(Flush frame data)
        //     Module._flushDecoder();
        //     Module._closeDecoder();
        // 
      
    , 1)
  
  displayVideoFrame(obj) 
    var data = new Uint8Array(obj.data);
    var width = obj.width;
    var height = obj.height;
    var yLength = width * height;
    var uvLength = (width / 2) * (height / 2);
    if(!glPlayer) 
      canvas.height = (canvas.width / width) * height;
      arCanvas.height = (canvas.width / width) * height;
      glPlayer = new WebGLPlayer(canvas, 
        preserveDrawingBuffer: false
      , arCanvas);
    
    glPlayer.renderFrame(data, width, height, yLength, uvLength);
  
  displayVideoAr(pts, width, height) 
    if (!glPlayer) return;
    let target = this.arQueue[pts];
    if (target) 
      delete this.arQueue[pts];
      glPlayer.renderAR(target, width, height);
    
  

最后就是webgl渲染类,主要负责处理解码之后的Yuv数据跟ar数据进行绘制,当然另外再抽离了一个webgl用的texture类,这里就不列出来了。就是标准的webgl纹理处理

export default class WebGLPlayer 
    constructor(canvas, options, arCanvas) 
      this.canvas = canvas;
      this.gl = canvas.getContext("webgl") || canvas.getContext("experimental-webgl");
      this.ctx = arCanvas.getContext("2d")
      this.initGL(options);
    
    initGL(options) 
        if (!this.gl) 
          console.log("[ER] WebGL not supported.");
          return;
        

        var gl = this.gl;
        gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1);
        var program = gl.createProgram();
        var vertexShaderSource = [
          "attribute highp vec4 aVertexPosition;",
          "attribute vec2 aTextureCoord;",
          "varying highp vec2 vTextureCoord;",
          "void main(void) ",
          " gl_Position = aVertexPosition;",
          " vTextureCoord = aTextureCoord;",
          ""
        ].join("\\n");
        var vertexShader = gl.createShader(gl.VERTEX_SHADER);
        gl.shaderSource(vertexShader, vertexShaderSource);
        gl.compileShader(vertexShader);
        var fragmentShaderSource = [
          "precision highp float;",
          "varying lowp vec2 vTextureCoord;",
          "uniform sampler2D YTexture;",
          "uniform sampler2D UTexture;",
          "uniform sampler2D VTexture;",
          "const mat4 YUV2RGB = mat4",
          "(",
          " 1.1643828125, 0, 1.59602734375, -.87078515625,",
          " 1.1643828125, -.39176171875, -.81296875, .52959375,",
          " 1.1643828125, 2.017234375, 0, -1.081390625,",
          " 0, 0, 0, 1",
          ");",
          "void main(void) ",
          " gl_FragColor = vec4( texture2D(YTexture, vTextureCoord).x, texture2D(UTexture, vTextureCoord).x, texture2D(VTexture, vTextureCoord).x, 1) * YUV2RGB;",
          ""
        ].join("\\n");

        var fragmentShader = gl.createShader(gl.FRAGMENT_SHADER);
        gl.shaderSource(fragmentShader, fragmentShaderSource);
        gl.compileShader(fragmentShader);
        gl.attachShader(program, vertexShader);
        gl.attachShader(program, fragmentShader);
        gl.linkProgram(program);
        gl.useProgram(program);
        if (!gl.getProgramParameter(program, gl.LINK_STATUS)) 
          console.log("[ER] Shader link failed.");
        
        var vertexPositionAttribute = gl.getAttribLocation(program, "aVertexPosition");
        gl.enableVertexAttribArray(vertexPositionAttribute);
        var textureCoordAttribute = gl.getAttribLocation(program, "aTextureCoord");
        gl.enableVertexAttribArray(textureCoordAttribute);

        var verticesBuffer = gl.createBuffer();
        gl.bindBuffer(gl.ARRAY_BUFFER, verticesBuffer);
        gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 1.0, -1.0, 0.0, -1.0, -1.0, 0.0]), gl.STATIC_DRAW);
        gl.vertexAttribPointer(vertexPositionAttribute, 3, gl.FLOAT, false, 0, 0);
        var texCoordBuffer = gl.createBuffer();
        gl.bindBuffer(gl.ARRAY_BUFFER, texCoordBuffer);
        gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0]), gl.STATIC_DRAW);
        gl.vertexAttribPointer(textureCoordAttribute, 2, gl.FLOAT, false, 0, 0);

        gl.y = new Texture(gl);
        gl.u = new Texture(gl);
        gl.v = new Texture(gl);
        gl.y.bind(0, program, "YTexture");
        gl.u.bind(1, program, "UTexture");
        gl.v.bind(2, program, "VTexture");
      ;
renderFrame(videoFrame, width, height, uOffset, vOffset) 
      if (!this.gl) 
        console.log("[ER] Render frame failed due to WebGL not supported.");
        return;
      

      var gl = this.gl;
      gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);
      gl.clearColor(0.0, 0.0, 0.0, 0.0);
      gl.clear(gl.COLOR_BUFFER_BIT);

      // 清空ar画布
      this.ctx.clearRect(0, 0, gl.canvas.width, gl.canvas.height);

      gl.y.fill(width, height, videoFrame.subarray(0, uOffset));
      gl.u.fill(width >> 1, height >> 1, videoFrame.subarray(uOffset, uOffset + vOffset));
      gl.v.fill(width >> 1, height >> 1, videoFrame.subarray(uOffset + vOffset, videoFrame.length));

      gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4);
    ;
    renderAR(arr, width, height) 
      var gl = this.gl;
      arr.forEach( obj => 
        const x = (gl.canvas.width / width) * obj.objRect.left;
        const y = (gl.canvas.height / height) * obj.objRect.top;
        const w = (gl.canvas.width / width) * (obj.objRect.right - obj.objRect.left);
        const h = (gl.canvas.height / height) * (obj.objRect.bottom - obj.objRect.top);
        const c = this.ctx;

注意细节

  1. 如果是回放则需要把没有播放完的片段保留在队列,直播则直接舍弃seek到最新的点

  2. mediaSource的sourceBuffer.mode记得设置为sequence,此配置意味着video将按照buffer队列依次播放,不会根据buffer的时间戳来播放。

以上是关于h264,265实时视频流解码及人脸追踪的实现的主要内容,如果未能解决你的问题,请参考以下文章

hevc和h.264的区别

技术开发:H.265编码视频在web网页实现无插件播放应该通过软解码还是硬解码?

聊聊视频中的编解码器,你所不知道的h264h265vp8vp9和av1编解码库

聊聊视频中的编解码器,你所不知道的h264h265vp8vp9和av1编解码库

vlc源码分析 调用OpenMAX硬解码H.265

MediaPipe实现手指关键点检测及追踪,人脸识别及追踪