h264,265实时视频流解码及人脸追踪的实现
Posted 从小就很悬
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了h264,265实时视频流解码及人脸追踪的实现相关的知识,希望对你有一定的参考价值。
以下为本人实际工作中经验所得分享,
日常项目中涉及到实时视频流播放,大都会选择flvJs,后者videoJs。而由于这两款无法满足实际需求并且无法解码h265视频
流,所以在后端C++的配合下,一起写了一套自用的视频流播放器,视频解码使用的是libffmpeg,找不到资源的可以私信我,
原理就是利用wasm编写c++代码使用ffmpeg进行视频解码。由于算法解码压力,解码动作在浏览器完成对电脑及带宽都有一定要求。
视频播放我们采用了两种方式来实现,可以通过配置设置,一种是使用video播放mediaSource的流媒体,一种是使用webgl绘
制画面。mediaSource支持播放流媒体片段,这样播放器无需等待所有视频资源全都下载完再播放,可以不断的向播放器喂视
频流片段。
定义websockt连接类,管理信令及数据交互,分发事件
export default class WSReader extends Event
constructor(url)
super(WSReader);
this.TAG = [WSReader];
this.ws = new WebSocket(url);
this.ws.binaryType = arraybuffer;
this.ws.onopen = this._onWebSocketOpen.bind(this);
this.ws.onerror = this._onWebSocketError.bind(this);
this.ws.onmessage = this._onWebSocketMessage.bind(this);
this.ws.onclose = this._onWebSocketClose.bind(this);
this.wsMethods =
open: open, // 请求mime
play: play, // 请求推流
pause: pause, // 停止推流
close: close, // 关闭ws连接
slow: slow, // 请求降低推流频率
fast: fast, // 请求提高推流频率
complete: complete, // 视频流已经全部发送完。
;
this.seq = 1;
this.sendRate = 1; // 视频流传输频率,用来调整播放速度
this.isFullPause = false;
// 处理websocket message
_onWebSocketMessage(ev)
let data = ev;
if (data instanceof ArrayBuffer)
this.dispatch(VideoEvents.VIDEO_EVENT, data);
else
this.wsMessageHandle(data);
// 根据ws主体内容来分配事件
wsMessageHandle(data)
let mes = JSON.parse(data);
switch (mes.method)
case open:
debug.log(`get mime $mes.mime`);
this.openHandle(mes)
break;
case play:
debug.log(`ws play signal`);
this.playHandle(mes)
break;
case pause:
debug.log(`ws pause signal`);
this.pauseHandle(mes)
break;
case close:
debug.log(`ws close signal`);
this.closeHandle()
break;
case slow:
debug.log(`ws slow signal`);
this.speedHandle(mes)
break;
case fast:
debug.log(`ws fast signal`);
this.speedHandle(mes)
break;
case complete:
debug.log(`ws complete signal`);
this.completeHandle()
break;
case mediaChange:
debug.log(`ws mediaChange signal`);
this.mediaChangeHandle(mes)
break;
// 获取到MIME
_onWsGetMime(data)
if (data.ret == 0)
this.videoInfo.mime = data.mime;
if (this.options.renderType === webgl)
// 获取到mime,根据mime判断为h264还是h265,初始化decoder
this.decoder.init(data.mime.indexOf(avc) !== -1 ? 0 : 1);
this.createBuffer();
// 成功获取到mime, 接下来发送play指令。
this.videoReader.play();
else
debug.log(this.TAG, `get mime type failed`);
this.dispatch(HSPlayer.Events.SERVER_PLAY_ERR, msg: get mime type failed)
定义对外基础类,包含播放器初始化,基础属性配置(倍速列表,缓冲区大小,播放类型等),播放器状态监听及异常分发。处理视频流数据
export default class HSPlayer extends Event
// 向外开放的监听事件类型。
static get Events()
return
// 请求播放视频失败
SERVER_PLAY_ERR: SERVER_PLAY_ERR,
// 请求暂停视频失败
SERVER_PAUSE_ERR: SERVER_PAUSE_ERR,
// 请求变速失败
SERVER_SPEED_ERR: SERVER_SPEED_ERR,
// 服务器的连接出现错误
SERVER_NET_ERR: SERVER_NET_ERR,
// 由于网络异常导致到连接中断
ABNORMAL_DISCONNECT: ABNORMAL_DISCONNECT,
// 浏览器不支持当前视频的格式
CHROME_CODEC_UNSUPPORT: CHROME_CODEC_UNSUPPORT,
// 视频有缺失或被污染
VIDEO_STREAM_INCORRECT: VIDEO_STREAM_INCORRECT,
// 实时视频流缓冲太长,需要seek到最新点
VIDEO_LIVE_STREAM_TOO_LOOG: VIDEO_LIVE_STREAM_TOO_LOOG,
// 通知前端播放成功
VIDEO_PLAY_SUCESS: VIDEO_PLAY_SUCESS,
;
static isSupported(mimeCode)
return (window.MediaSource && window.MediaSource.isTypeSupported(mimeCode));
constructor(options)
super(HSPlayer);
this.TAG = [HSPlayer];
let defaults =
node: , // video 节点
cacheBufferTime: 60, // 回放最大缓存时长 单位秒
cacheBufferMinTime: 30, // 回放缓存小于cacheBufferMinTime时,重新获取流
cleanOffset: 0.8, // 清除buf时剩余的时长,单位秒
debug: false, // 是否打印出控制台信息
delayPlay: 0, // 获取实时视频流可以设置延时播放,单位ms
type: live, // live 直播, playback 回放
wsUrl: null, // websocket 地址,目前项目信令跟视频流都用同一个地址
flushTime: 3 * 1000, // 清空buffer的间隔,用于直播
drawArInfo: false, // 是否需要画ar信息
renderType: null, // 如果是 webgl,则使用本地解码。
;
// _onWsVideoBuffer
/*
* int8_t version;
int16_t headLen;
int8_t frameNum;
int8_t type;
int8_t codec;
int32_t beginTimeStampSec;
int32_t beginTimeStampMs;
int32_t EndTimeStampSec;
int32_t EndTimeStampMs;
* */
_onWsVideoBuffer(originData)
// 判断是否有头信息
let headMagic = new Uint8Array(originData.slice(0, 4));
// 获取头部长度
let hAr, hLen = 0;
if (
headMagic[0] == 117 &&
headMagic[1] == 109 &&
headMagic[2] == 120 &&
headMagic[3] == 115
)
hAr = new Uint8Array(originData.slice(5, 8));
hLen = (hAr[0] << 8) + hAr[1];
if (!this.firstFrameTime && originData)
let hBuffer = new Uint8Array(originData.slice(0, hLen));
// 前6个字节是version, headLen, type....等
// 后8个字节是帧结束时间,暂时不用
let sec = (hBuffer[10] << 24) +
(hBuffer[11] << 16) +
(hBuffer[12] << 8) +
hBuffer[13];
let ms = (hBuffer[14] << 24) +
(hBuffer[15] << 16) +
(hBuffer[16] << 8) +
hBuffer[17];
this.firstFrameTime = sec * 1000 + ms;
let data = originData.slice(hLen);
if (this.options.renderType === webgl)
// ar数据
let arLenBuf = new Uint8Array(originData.slice(27, 29));
let arLen = (arLenBuf[0] << 8) + arLenBuf[1];
if (arLen)
let arTarget = new Uint8Array(originData.slice(29, arLen + 29));
let arJson = ;
arTarget.forEach(x =>
arJson += String.fromCharCode(x);
)
let targetObj = JSON.parse(arJson);
if ( targetObj.arInfo && targetObj.arInfo.objList )
this.decoder.feed(data, targetObj.arInfo.objList);
else
this.decoder.feed(data, null);
else
this.decoder.feed(data, null);
return false;
// 如果是回放则把没有播放的buffer放入pendingBufs。直播则直接遗弃
if (this.options.type != live)
while (this.pendingBufs.length > 0 && this.bufferController)
let buf = this.pendingBufs.shift();
this.bufferController.feed(buf);
if(this.bufferController)
this.bufferController.feed(data);
else
this.pendingBufs.push(data);
else
if(this.bufferController)
this.bufferController.feed(data);
定义h264,h265解码类,因为此webgl模式下是由前端负责解码工作并绘制,底层只负责推送裸流,如果使用mediaSource模式则是c++将视频解码之后再推送过来。所以需要使用Decoder类,主要负责视频流队列管理,ar队列管理,定时解码,发布解码之后的数据
export default class Decoder
constructor(node)
this.queue = []; // 队列
this.arIndex = -1;
this.arQueue = ; // ar信息队列
this.LOG_LEVEL_FFMPEG = 2;
this.LOG_LEVEL_JS = 0;
this.LOG_LEVEL_WASM = 1;
this.node = node;
// 视频画面画布
canvas = document.createElement(canvas);
canvas.width = node.clientWidth;
canvas.setAttribute(
style,
`width: 100%;height: auto;position: absolute;left: 0;top:0;`
);
// ar信息画布
arCanvas = document.createElement(canvas);
arCanvas.width = node.clientWidth;
arCanvas.setAttribute(
style,
`width: 100%;height: auto;position: absolute;left: 0;top:0;`
);
node.parentNode.appendChild(canvas);
node.parentNode.appendChild(arCanvas);
feed(buffer, ar)
this.arIndex++;
this.queue.push(buffer);
if (ar)
this.arQueue[this.arIndex] = ar;
init(decoderType)
videoCallback = Module.addFunction((addr_y, addr_u, addr_v, stride_y, stride_u, stride_v, width, height, pts) =>
// console.log("[%d]In video callback, size = %d * %d, pts = %d", ++videoSize, width, height, pts)
let size = width * height + (width / 2) * (height / 2) + (width / 2) * (height / 2);
let data = new Uint8Array(size);
let pos = 0;
for(let i=0; i< height; i++)
let src = addr_y + i * stride_y
let tmp = HEAPU8.subarray(src, src + width)
tmp = new Uint8Array(tmp)
data.set(tmp, pos)
pos += tmp.length
for(let i=0; i< height / 2; i++)
let src = addr_u + i * stride_u
let tmp = HEAPU8.subarray(src, src + width / 2)
tmp = new Uint8Array(tmp)
data.set(tmp, pos)
pos += tmp.length
for(let i=0; i< height / 2; i++)
let src = addr_v + i * stride_v
let tmp = HEAPU8.subarray(src, src + width / 2)
tmp = new Uint8Array(tmp)
data.set(tmp, pos)
pos += tmp.length
var obj =
data: data,
width,
height
this.displayVideoFrame(obj);
this.displayVideoAr(pts, width, height);
);
var ret = Module._openDecoder(decoderType, videoCallback, this.LOG_LEVEL_WASM)
if(ret == 0)
console.log("openDecoder success");
else
console.error("openDecoder failed with error", ret);
return;
var pts = 0;
// 定时解码
setInterval(() =>
const data = this.queue.shift();
if (data)
const typedArray = new Uint8Array(data);
const size = typedArray.length;
var cacheBuffer = Module._malloc(size);
Module.HEAPU8.set(typedArray, cacheBuffer);
Module._decodeData(cacheBuffer, size, pts++)
if (cacheBuffer != null)
Module._free(cacheBuffer);
cacheBuffer = null;
// if(size < CHUNK_SIZE)
// console.log(Flush frame data)
// Module._flushDecoder();
// Module._closeDecoder();
//
, 1)
displayVideoFrame(obj)
var data = new Uint8Array(obj.data);
var width = obj.width;
var height = obj.height;
var yLength = width * height;
var uvLength = (width / 2) * (height / 2);
if(!glPlayer)
canvas.height = (canvas.width / width) * height;
arCanvas.height = (canvas.width / width) * height;
glPlayer = new WebGLPlayer(canvas,
preserveDrawingBuffer: false
, arCanvas);
glPlayer.renderFrame(data, width, height, yLength, uvLength);
displayVideoAr(pts, width, height)
if (!glPlayer) return;
let target = this.arQueue[pts];
if (target)
delete this.arQueue[pts];
glPlayer.renderAR(target, width, height);
最后就是webgl渲染类,主要负责处理解码之后的Yuv数据跟ar数据进行绘制,当然另外再抽离了一个webgl用的texture类,这里就不列出来了。就是标准的webgl纹理处理
export default class WebGLPlayer
constructor(canvas, options, arCanvas)
this.canvas = canvas;
this.gl = canvas.getContext("webgl") || canvas.getContext("experimental-webgl");
this.ctx = arCanvas.getContext("2d")
this.initGL(options);
initGL(options)
if (!this.gl)
console.log("[ER] WebGL not supported.");
return;
var gl = this.gl;
gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1);
var program = gl.createProgram();
var vertexShaderSource = [
"attribute highp vec4 aVertexPosition;",
"attribute vec2 aTextureCoord;",
"varying highp vec2 vTextureCoord;",
"void main(void) ",
" gl_Position = aVertexPosition;",
" vTextureCoord = aTextureCoord;",
""
].join("\\n");
var vertexShader = gl.createShader(gl.VERTEX_SHADER);
gl.shaderSource(vertexShader, vertexShaderSource);
gl.compileShader(vertexShader);
var fragmentShaderSource = [
"precision highp float;",
"varying lowp vec2 vTextureCoord;",
"uniform sampler2D YTexture;",
"uniform sampler2D UTexture;",
"uniform sampler2D VTexture;",
"const mat4 YUV2RGB = mat4",
"(",
" 1.1643828125, 0, 1.59602734375, -.87078515625,",
" 1.1643828125, -.39176171875, -.81296875, .52959375,",
" 1.1643828125, 2.017234375, 0, -1.081390625,",
" 0, 0, 0, 1",
");",
"void main(void) ",
" gl_FragColor = vec4( texture2D(YTexture, vTextureCoord).x, texture2D(UTexture, vTextureCoord).x, texture2D(VTexture, vTextureCoord).x, 1) * YUV2RGB;",
""
].join("\\n");
var fragmentShader = gl.createShader(gl.FRAGMENT_SHADER);
gl.shaderSource(fragmentShader, fragmentShaderSource);
gl.compileShader(fragmentShader);
gl.attachShader(program, vertexShader);
gl.attachShader(program, fragmentShader);
gl.linkProgram(program);
gl.useProgram(program);
if (!gl.getProgramParameter(program, gl.LINK_STATUS))
console.log("[ER] Shader link failed.");
var vertexPositionAttribute = gl.getAttribLocation(program, "aVertexPosition");
gl.enableVertexAttribArray(vertexPositionAttribute);
var textureCoordAttribute = gl.getAttribLocation(program, "aTextureCoord");
gl.enableVertexAttribArray(textureCoordAttribute);
var verticesBuffer = gl.createBuffer();
gl.bindBuffer(gl.ARRAY_BUFFER, verticesBuffer);
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 1.0, -1.0, 0.0, -1.0, -1.0, 0.0]), gl.STATIC_DRAW);
gl.vertexAttribPointer(vertexPositionAttribute, 3, gl.FLOAT, false, 0, 0);
var texCoordBuffer = gl.createBuffer();
gl.bindBuffer(gl.ARRAY_BUFFER, texCoordBuffer);
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0]), gl.STATIC_DRAW);
gl.vertexAttribPointer(textureCoordAttribute, 2, gl.FLOAT, false, 0, 0);
gl.y = new Texture(gl);
gl.u = new Texture(gl);
gl.v = new Texture(gl);
gl.y.bind(0, program, "YTexture");
gl.u.bind(1, program, "UTexture");
gl.v.bind(2, program, "VTexture");
;
renderFrame(videoFrame, width, height, uOffset, vOffset)
if (!this.gl)
console.log("[ER] Render frame failed due to WebGL not supported.");
return;
var gl = this.gl;
gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);
gl.clearColor(0.0, 0.0, 0.0, 0.0);
gl.clear(gl.COLOR_BUFFER_BIT);
// 清空ar画布
this.ctx.clearRect(0, 0, gl.canvas.width, gl.canvas.height);
gl.y.fill(width, height, videoFrame.subarray(0, uOffset));
gl.u.fill(width >> 1, height >> 1, videoFrame.subarray(uOffset, uOffset + vOffset));
gl.v.fill(width >> 1, height >> 1, videoFrame.subarray(uOffset + vOffset, videoFrame.length));
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4);
;
renderAR(arr, width, height)
var gl = this.gl;
arr.forEach( obj =>
const x = (gl.canvas.width / width) * obj.objRect.left;
const y = (gl.canvas.height / height) * obj.objRect.top;
const w = (gl.canvas.width / width) * (obj.objRect.right - obj.objRect.left);
const h = (gl.canvas.height / height) * (obj.objRect.bottom - obj.objRect.top);
const c = this.ctx;
注意细节
-
如果是回放则需要把没有播放完的片段保留在队列,直播则直接舍弃seek到最新的点
- mediaSource的sourceBuffer.mode记得设置为sequence,此配置意味着video将按照buffer队列依次播放,不会根据buffer的时间戳来播放。
以上是关于h264,265实时视频流解码及人脸追踪的实现的主要内容,如果未能解决你的问题,请参考以下文章
技术开发:H.265编码视频在web网页实现无插件播放应该通过软解码还是硬解码?
聊聊视频中的编解码器,你所不知道的h264h265vp8vp9和av1编解码库