egret-基于TireTree的敏感字过滤

Posted 明立

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了egret-基于TireTree的敏感字过滤相关的知识,希望对你有一定的参考价值。

1.单例基类

class BaseSingle 
	public constructor(...args) 
		
	
	public static Ins(...args): any 
		let cls: any = this;
		if (!cls._instance) 
			cls._instance = new cls(...args);
		
		return cls._instance;
	

2.TireNode节点定义

//TireTree的Node节点
class TireNode 
	public ids: number[] = [];//id组,经过该节点的id集合
	public endId: number = -1;//是否是结束节点
	public char: string//当前节点的字符
	public leftNode: TireNode[] = []//左侧树节点
	public rightNode: TireNode[] = []//右侧树节点
	public bisEnd: boolean = false;
	public constructor() 
	
	//填充节点
	public fill(id, text) 
		if (this.ids.length == 0 || this.ids.indexOf(id) == -1) 
			this.ids.push(id);
		
		this.bisEnd = (text.length == 1)
		if (this.bisEnd) 
			this.char = text;
			this.endId = id;
		 else 
			this.char = text.slice(0, 1)[0]
			this.next(id, text.slice(1))
		
	
	
	public findExitArray(text) 
		if (text.length == 1) 
			return this.ids;
		
		text = text.slice(1);
		let nextCode = text.slice(0, 1)[0]
		let target: TireNode[] = null;
		if (nextCode.charCodeAt(0) > this.char.charCodeAt(0)) 
			target = this.rightNode;
		 else 
			target = this.leftNode;
		
		let ind = this.bIsExit(target, nextCode);
		if (ind != -1) 
			return target[ind].findExitArray(text);
		 else 
			return [];
		
	
	public findEnd(text) 
		if (text.length == 1) 
			return this.endId
		
		text = text.slice(1);
		let nextCode = text.slice(0, 1)[0]
		let target: TireNode[] = null;
		if (nextCode.charCodeAt(0) > this.char.charCodeAt(0)) 
			target = this.rightNode;
		 else 
			target = this.leftNode;
		
		let ind = this.bIsExit(target, nextCode);

		if (ind != -1) 
			return target[ind].findEnd(text);
		 else 
			return -1;
		
	
	private next(id, text) 
		let nextCode = text.slice(0, 1)[0]
		let target: TireNode[] = null;
		if (nextCode.charCodeAt(0) > this.char.charCodeAt(0)) 
			target = this.rightNode;
		 else 
			target = this.leftNode;
		
		let ind = this.bIsExit(target, nextCode);
		if (ind != -1) 
			target[ind].fill(id, text);
		 else 
			let node = new TireNode();
			node.fill(id, text);
			target.push(node);
		
	
	private bIsExit(arr: TireNode[], char) 
		for (let i = 0; i < arr.length; i++) 
			let a = arr[i]
			if (a.char == char) 
				return i;
			
		
		return -1;
	

3.TireTree的定义

class TireTree extends BaseSingle 
	public threeNode: TireNode[] = [];

	public constructor() 
		super();
	
	public static Ins(): TireTree 
		return super.Ins();
	
	public fill(id, text: string, bword: boolean = true) 
		let nextCode;
		if (bword) 
			text = text.toUpperCase();
		

		if (text.length > 1) 
			nextCode = text.slice(0, 1)[0]
		 else 
			nextCode = text;
		
		let ind = this.bIsExit(nextCode);
		if (ind != -1) 
			this.threeNode[ind].fill(id, text);
		 else 
			let node = new TireNode();
			node.fill(id, text);
			this.threeNode.push(node);
		

	
	public findEnd(text: string, bword: boolean = true) 
		if (bword) 
			text = text.toUpperCase();
		
		// console.log("find", text);
		let nextCode = text.slice(0, 1)[0]
		let ind = this.bIsExit(nextCode);
		if (ind != -1) 
			return this.threeNode[ind].findEnd(text)
		 else 
			return -1;
		
	
	public findExitArray(text: string, bword: boolean = true) 
		if (bword) 
			text = text.toUpperCase();
		
		let nextCode = text.slice(0, 1)[0]
		let ind = this.bIsExit(nextCode);
		if (ind != -1) 
			return this.threeNode[ind].findExitArray(text)
		 else 
			return [];
		
	
	private bIsExit(char) 
		for (let i = 0; i < this.threeNode.length; i++) 
			let a = this.threeNode[i]
			if (a.char == char) 
				return i;
			
		
		return -1;
	


4.使用:管理类的定义

class TireMgr extends BaseSingle 
	public masks: any;//敏感字库
	public checkText: string;//要检查的字符
	public splitChar = [" ", "_", "*", "^", "%", "$", "@"]//忽略字符,检测的时候忽略这些符号
	public constructor() 
		super();
		this.masks = RES.getRes('maskwords_json')//加载敏感字库
	
	public static Ins(): TireMgr 
		return super.Ins();
	
	//将敏感字库填入TireTree中
	public init() 
		for (let key in this.masks) 
			TireTree.Ins().fill(key, this.masks[key]);
		
	
	//获取敏感字检查结果,有敏感字的地方用*号代替
	public getMask(text): string 
		console.log("strat", egret.getTimer());
		this.checkText = text
		let cache = ""
		for (let i = 0; i < this.checkText.length; i++) 
			let startIndex = 0;
			let endIndex = -1;
			let check = this.checkText[i];
			// console.log("开始检测索引", i);
			// console.log("检测值", check);
			// console.log("当前缓存", cache);
			if (this.splitChar.indexOf(check) == -1 && TireTree.Ins().findExitArray(check).length > 0) 
				//如果存在,标识开始索引值
				startIndex = i;
				if (TireTree.Ins().findEnd(check) != -1) 
					//如果有结束,记录一下结束索引值
					endIndex = i;
				
				//从现在这个开始到下一个
				let newCheck = check;
				let sind = startIndex;
				let eind = -1;
				for (let j = startIndex + 1; j < this.checkText.length; j++) 
					// console.log("递进步长", j);
					eind = j;
					if (this.splitChar.indexOf(this.checkText[j]) != -1) 
						// console.log("存在分词字符", this.checkText[j]);
						continue;
					
					newCheck += this.checkText[j];
					// console.log("检测字符串", newCheck);

					if (TireTree.Ins().findExitArray(newCheck).length > 0) 
						//如果仍然存在
						if (TireTree.Ins().findEnd(newCheck) != -1) 
							//如果有结束,记录一下结束索引值
							endIndex = j;
						 else 
							//没有的话,下一个j
							continue;
						
					 else 
						i = j - 1;

						break;
					
				
				// console.log("检测结束");

				//下一个查找结束,如果开始和结束标识不一样,代表有mask
				if (startIndex <= endIndex) 
					let mask = ""
					for (let ind = startIndex; ind < endIndex + 1; ind++) 
						mask += this.checkText[ind];
						cache += "*";
					
					// console.log("存在mask", mask);
					//把i值调大
					i = endIndex;
				 else 
					// console.log("本次检测无mask,检测字段", newCheck);
					// console.log("检测索引", sind, eind);
					let canCache = ""
					for (let ind = sind; ind < eind; ind++) 
						canCache += this.checkText[ind];
					
					// console.log("当前可缓存", canCache);
					cache += canCache;
				

			 else 
				cache += check;
				//如果不存在,记录当前值
			
		
		console.log("end", egret.getTimer());
		return cache;
	


5.敏感字库格式参考:


	"1":"测试",
	"2":"敏感字",
	"3":"测试下敏感字",
	"4":"敏",
	"5":"感"

说明

1.已经通过简单的敏感字测试,复杂条件下的有效性待确定
2.可用作其他搜索匹配方案,但注意资源格式,必须保留id作为键值
3.注意使用时要调用 TireMgr.Ins().init()进行初始化一次
4.如有疑问或建议,欢迎留言探讨

以上是关于egret-基于TireTree的敏感字过滤的主要内容,如果未能解决你的问题,请参考以下文章

前端实现敏感字过滤

练习 过滤用户输入的敏感字

使用动态代理对象进行敏感字过滤

过滤敏感字DFA JAVA实现

Egret IDE中搜索,过滤文件,只搜索.ts

java实现敏感词过滤(DFA算法)