城市查询-拼音全拼简拼混拼卷舌音前后鼻音兼容查询C#与nodejs+redis应用---笔记

Posted 蜗牛水里爬

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了城市查询-拼音全拼简拼混拼卷舌音前后鼻音兼容查询C#与nodejs+redis应用---笔记相关的知识,希望对你有一定的参考价值。

2018-10-18日整理更新

原文地址:http://blog.csdn.net/gzy11/article/details/52351697

实现主要功能:

1、全拼搜索。如:shanghai 搜索出上海
2、简拼搜索。如:sh 上海
3、模糊拼音搜素。实现类似搜狗拼音输入法。如:石嘴山 SZhSh;ShZSh;ShZhS;SZuiShan;ShiZShan;ShiZuiS;SZhuiShan;ShiZhuiS;SZhuiShang;ShiZShang;SZuiShang;SZuiSan;SiZSan;SiZuiS;ShZS;SZhS;SZSh;ShZuiShan;ShiZhShan;ShiZuiSh;ShZhuiShan;ShiZhuiSh;ShZhuiShang;ShiZhShang;ShZuiShang;ShZuiSan;SiZhSan;SiZuiSh;ShiZS;SZuiS;SZShan;ShiZhSh;ShZuiSh;ShZhShan;ShiZhuiShan;ShiZuiShan;ShiZhuiShang;ShiZuiShang;ShiZuiSan;SiZuiSan;SiZuiShan;SZhuiS;ShZhuiSh;SiZhuiSan;SZShang;ShZhShang;SiZuiShang;SiZS;SZSan;SiZhSh;ShZhSan;SiZhuiShan;ShiZhuiSan;SiZhuiShang

第一步:用C#简单实现拼音的全拼简拼卷舌音,前后鼻音兼容等功能。并建立redis所需查询索引等。

只是简单实现了该功能,对性能无优化。前提条件:城市拼音手动建立好。如:北京 全拼  BeiJing  区分大小写,简拼BJ 。简拼可有可无,没有的话,自己截取出来即可。

关于全拼的获取,一种是源数据中包含拼音,另一种是通过汉字获取拼音。该方法通过微软的 Microsoft Visual Studio International ,这玩意有两个版本我们用到的是1.0版。2.0版是1.0版的增强版。需要 引用ChnCharInfo.dll

Microsoft Visual Studio International Feature Pack 2.0 

Microsoft Visual Studio International Pack 1.0 

代码如下:

版本:1.0.0.14

using Microsoft.International.Converters.PinYinConverter;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace XXXXXX.Toolkits.Common
{
    public class SpellIndexHelper
    {
        /// <summary>
        /// 声母
        /// </summary>
        private static string[,] initialsList = new string[3, 2] { { "Z", "Zh" }, { "C", "Ch" }, { "S", "Sh" } };

        /// <summary>
        /// 韵母 Finals 
        /// </summary>
        private static string[,] finalsList = new string[5, 2] { { "an", "ang" }, { "en", "eng" }, { "in", "ing" }, { "An", "Ang" }, { "En", "Eng" } };

        /// <summary>
        /// 声母加韵母
        /// </summary>
        private static string[,] spellList = new string[8, 2] { { "Z", "Zh" }, { "C", "Ch" }, { "S", "Sh" }, { "an", "ang" }, { "en", "eng" }, { "in", "ing" }, { "An", "Ang" }, { "En", "Eng" } };


        #region 汉字转化拼音

        /// <summary> 
        /// 汉字转化为拼音
        /// </summary> 
        /// <param name="chinese">汉字</param> 
        /// <returns>全拼</returns> 
        public static string GetSpell(string chinese, bool isShortSpell = false)
        {
            string result = string.Empty;
            foreach (char spellChar in chinese)
            {
                if (spellChar >= 0x4e00 && spellChar <= 0x9fbb)//判断是否是中文
                {
                    ChineseChar chineseChar = new ChineseChar(spellChar);
                    result += Capitalize(chineseChar.Pinyins[0].ToString());
                }
            }
            return result;
        }
        /// <summary>
        /// 首字母变为大写
        /// </summary>
        /// <param name="spell"></param>
        /// <param name="isShortSpell"></param>
        /// <returns></returns>
        private static string Capitalize(string spell, bool isShortSpell = false)
        {
            return isShortSpell ? spell.Substring(0, 1).ToUpper() : spell.Substring(0, 1).ToUpper() + spell.Substring(1, spell.Length - 2).ToLower();
        }

        #endregion

        #region 包含函数

        /// <summary>
        /// 中文名称匹配
        /// </summary>
        /// <param name="value"></param>
        /// <param name="query"></param>
        /// <returns></returns>
        public static bool IsChineseMatch(string value, string query)
        {
            if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(value)) return false;
            if (query.Length > value.Length) return false;
            int len = query.Length;
            return value.ToLower().Substring(0, len).Contains(query.ToLower());

        }
        /// <summary>
        /// 全拼匹配
        /// </summary>
        /// <param name="value"></param>
        /// <param name="query"></param>
        /// <returns></returns>
        public static bool IsSpellMatch(string value, string query)
        {
            if (string.IsNullOrWhiteSpace(query) || string.IsNullOrWhiteSpace(value)) return false;
            if (IsSpellContains(value, query)) return true;
            return IsSpellAppendContains(value, query);
        }

        /// <summary>
        /// 直接对比是否包含
        /// </summary>
        /// <param name="value"></param>
        /// <param name="query"></param>
        /// <returns></returns>
        private static bool IsSpellContains(string value, string query)
        {
            if (query.Length > value.Length) return false;
            return value.Substring(0, query.Length).ToLower().Contains(query.ToLower());

        }

        /// <summary>
        /// 对比增量是否包含
        /// </summary>
        /// <param name="value"></param>
        /// <param name="query"></param>
        /// <returns></returns>
        private static bool IsSpellAppendContains(string value, string query)
        {
            string queryAppend = Append(query, true).ToLower();
            string valueAppend = Append(value, true).ToLower();
            if (queryAppend.Length > valueAppend.Length) return false;
            return IsSpellContains(valueAppend, queryAppend);
        }
        #endregion

        /// <summary>
        /// 追加模糊匹配的全部增量(BeiJin->BeiJing)
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string Append(string spell)
        {
            //for (int i = 0; i < 8; i++)
            //{
            //    spell = spell.Replace(spellList[i, 0], spellList[i, 1]);
            //}
            //spell = spell.Replace("hh", "h");
            //spell = spell.Replace("gg", "g");
            //return spell;
            return Append(spell, false);
        }

        /// <summary>
        /// 追加模糊匹配的全部增量并转换为小写(BeiJin->beijing)
        /// </summary>
        /// <param name="spell"></param>
        /// <param name="isLower"></param>
        /// <returns></returns>
        public static string Append(string spell, bool isLower)
        {
            spell = isLower ? spell.ToLower() : spell;
            for (int i = 0; i < 8; i++)
            {
                spell = isLower ? spell.Replace(spellList[i, 0].ToLower(), spellList[i, 1].ToLower()) : spell.Replace(spellList[i, 0], spellList[i, 1]);
            }
            spell = spell.Replace("hh", "h");
            spell = spell.Replace("gg", "g");
            return spell;
        }

        /// <summary>
        /// 追加声母
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string AppendInitials(string spell)
        {
            for (int i = 0; i < 3; i++)
            {
                spell = spell.Replace(initialsList[i, 0], initialsList[i, 1]);
            }
            spell = spell.Replace("hh", "h");
            return spell;
        }

        /// <summary>
        /// 追加韵母
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string AppendFinals(string spell)
        {
            for (int i = 0; i < 5; i++)
            {
                spell = spell.Replace(finalsList[i, 0], finalsList[i, 1]);
            }
            spell = spell.Replace("gg", "g");
            return spell;
        }

        /// <summary>
        /// 去掉模糊匹配全部增量(beijing->beijin)
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string Remove(string spell)
        {
            for (int i = 0; i < 8; i++)
            {
                spell = spell.Replace(spellList[i, 1], spellList[i, 0]);
            }
            return spell;
        }

        /// <summary>
        /// 去掉模糊匹配声母
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string RemoveInitials(string spell)
        {
            for (int i = 0; i < 3; i++)
            {
                spell = spell.Replace(initialsList[i, 1], initialsList[i, 0]);
            }
            return spell;
        }

        /// <summary>
        /// 去掉模糊匹配韵母
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string RemoveFinals(string spell)
        {
            for (int i = 0; i < 5; i++)
            {
                spell = spell.Replace(finalsList[i, 1], finalsList[i, 0]);
            }
            return spell;
        }


        /// <summary>
        /// 根据大小写分割拼音(BeiJing,分割为Bei Jing)
        /// </summary>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static List<string> SplitSpell(string spell)
        {
            if (string.IsNullOrWhiteSpace(spell))
            {
                return null;
            }
            int length = spell.Length;
            List<string> list = new List<string>();
            string splitPY = null;
            for (int i = 0; i < length; i++)
            {
                if (char.IsUpper(spell, i))//大写
                {
                    if (splitPY != null)
                        list.Add(splitPY);
                    splitPY = null;//清空
                    splitPY += spell.Substring(i, 1);
                    if (i == length - 1)//如果是最后一个
                    {
                        list.Add(splitPY);
                    }
                }
                if (char.IsLower(spell, i))//小写
                {
                    splitPY += spell.Substring(i, 1);
                    if (i == length - 1)//如果是最后一个
                    {
                        list.Add(splitPY);
                    }
                }
            }
            return list;
        }

        /// <summary>
        /// 创建所有混拼索引
        /// </summary>
        /// <param name="shortSpell"></param>
        /// <param name="spell"></param>
        /// <returns></returns>
        public static string CreateHybridIndex(string shortSpell, string spell)
        {
            List<List<string>> list = new List<List<string>>(); //第一层有多少个分割的拼音,第二层拼音
            list.Add(SplitSpell(shortSpell));                   //添加原始数据---简拼
            list.Add(SplitSpell(AppendInitials(shortSpell)));   //添加补全声母---简拼
            list.Add(SplitSpell(spell));                        //添加原始数据---全拼
            list.Add(SplitSpell(AppendInitials(spell)));        //添加补全声母---全拼
            list.Add(SplitSpell(Append(spell)));                //添加补全-------全拼
            list.Add(SplitSpell(AppendFinals(spell)));          //添加补全韵母---全拼
            list.Add(SplitSpell(RemoveInitials(spell)));        //移除所有声母---全拼
            list.Add(SplitSpell(RemoveFinals(spell)));          //移除所有韵母---全拼
            list.Add(SplitSpell(Remove(spell)));                //移除所有-------全拼
            list = Reverse(list); //翻转拼音

            List<string> resultList = null;
            if (list.Count >= 2)
            {
                int len = list.Count - 1;
                for (int i = 0; i < len; i++)
                {
                    if (resultList == null)
                        resultList = GetCombination(list[i], list[i + 1]);
                    else
                        resultList = GetCombination(resultList, list[i + 1]).Distinct().ToList();
                }
            }
            return GetCombinationToString(resultList);
        }

        /// <summary>
        /// 反转集合
        /// </summary>
        /// <param name="list"></param>
        /// <returns></returns>
        private static List<List<string>> Reverse(List<List<string>> list)
        {
            List<List<string>> resultList = new List<List<string>>();
            int length = list[0].Count;
            for (int i = 0; i < length; i++)
            {
                List<string> li = new List<string>();
                foreach (var item in list)
                {
                    li.Add(item[i]);
                }
                resultList.Add(li);
            }
            return resultList;
        }

        /// <summary>
        /// 拼音的组合
        /// </summary>
        /// <param name="first"></param>
        /// <param name="last"></param>
        /// <returns></returns>
        private static List<string> GetCombination(List<string> first, List<string> last)
        {
            int lenFirst = first.Count;
            int lenLast = last.Count;
            List<string> result = new List<string>();
            for (int i = 0; i < lenFirst; i++)
            {
                for (int j = 0; j < lenLast; j++)
                {
                    result.Add(first[i] + last[j]);
                }
            }
            return result.Distinct().ToList();
        }

        /// <summary>
        /// 转换成字符串
        /// </summary>
        /// <param name="list"></param>
        /// <returns></returns>
        private static string GetCombinationToString(List<string> list)
        {
            string result = null;
            foreach (var item in list)
            {
                result += item + ";";
            }
            return result.Substring(0, result.Length - 1);
        }

        /// <summary>
        /// 去重
        /// </summary>
        /// <param name="hybridSpell"></param>
        /// <returns></returns>
        private static string Distinct(string hybridSpell)
        {
            var list = hybridSpell.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries).Distinct();
            string result = null;
            foreach (var item in list)
            {
                result += item + ";";
            }
            return result.Substring(0, result.Length - 1);
        }
    }
}

 

C#调用

    [TestMethod]
        public void GetSpell()
        {
            //string str = "2017-12-12 12:12:12";
            string result = SpellIndexHelper.GetSpell("456北京"); //获取汉字拼音
        }
        [TestMethod]
        public void CreateHybridIndex()
        {
            string shortSpell="BJ";
            string spell = SpellIndexHelper.GetSpell("456北京");
            SpellIndexHelper.CreateHybridIndex(shortSpell,spell);//建立索引
        }
         [TestMethod]
        public void IsSpellMatch()
        {
            //string str = "2017-12-12 12:12:12";
            SpellIndexHelper.IsSpellMatch("jinan", "jin");//C#中简单 卷舌音 前后鼻音 查询
        }          

效果如下:

生成混拼索引后存入SQL数据库中,效果如下:

第二步:用nodejs实现查询功能。

nodejs的异步真的很操蛋啊,真是个陷阱。从redis取数据的时候真麻烦。

nodejs 代码如下:

<pre class="javascript" name="code">var dbHelper = require('../../WebApi/MSSQL/dbHelper.js');
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;

//var idd=1;
/*dbHelper.select("dbo.[InsuranceRegion]",1,"where id=@id",{id: idd},"order by id",function(err,result){
 console.log(result);
 });*/
//dbHelper.select("dbo.InsuranceRegion","","","","order by RegionId",function(err,result){
//console.log(result);
//});
var redis = require("redis");
var express = require("express");
var app=express();
var  isSuccess =false;
var client  = redis.createClient('6379', '192.168.151.87');
// redis 链接错误
client.on("error", function(error) {
    console.log(error);
});
var convertNullToEmptyStr=function(data){
    return data==null?"":data;
};
var convertToBool=function(data){
    return data==null?0:1;
};
/*
var json = "{'name':'aa','age':18}";
var jsonobj = eval('('+json+")");
for (var key in jsonobj) {
    console.log(key)
}*/
var sql ="SELECT  [InsuranceRegion].[RegionId]  ,[CityID]    ,[Name]    ,[Spell]    ,[FullName]    ,[Level]    ,[OrderNumber] ,[ShorSpell],[HotFlag] ,[HotOrderNumber],[LicensePlateCode] ,[SpellAdapter] ,[SpellIndex],[YGBX]    ,[CCIC]    ,[AXATP]    ,[PICC]    ,[CPIC]    ,[PAIC]    ,[ZABX] FROM [Finance].[dbo].[InsuranceRegion] left  join [InsuranceRegionMapping] on [InsuranceRegion].RegionId =[InsuranceRegionMapping].GBCode"
dbHelper.querySql(sql,"",function(err,result){
    for (var i in result){
        client.hmset(
            "baoxian:Region:RegionId:"+result[i].RegionId,
            "RegionId",result[i].RegionId,
            "CityID", convertNullToEmptyStr(result[i].CityID),
            "Name", convertNullToEmptyStr(result[i].Name),
            "Spell",convertNullToEmptyStr(result[i].Spell),
            "FullName", convertNullToEmptyStr(result[i].FullName),
            "Level",  convertNullToEmptyStr(result[i].Level),
            "OrderNumber",  convertNullToEmptyStr(result[i].OrderNumber),
            "ShorSpell",  convertNullToEmptyStr(result[i].ShorSpell),
            "HotFlag", convertNullToEmptyStr( result[i].HotFlag),
            "HotOrderNumber",  convertNullToEmptyStr( result[i].HotOrderNumber),
            "LicensePlateCode",  convertNullToEmptyStr( result[i].LicensePlateCode),
            "SpellIndex",  convertNullToEmptyStr( result[i].SpellIndex),
            "SpellAdapter",  convertNullToEmptyStr( result[i].SpellAdapter),
            "YGBX",convertToBool( result[i].YGBX),
            "CCIC",convertToBool( result[i].CCIC),
            "AXATP",convertToBool( result[i].AXATP),
            "PICC",convertToBool( result[i].PICC),
            "CPIC",convertToBool( result[i].CPIC),
            "PAIC",convertToBool( result[i].CPIC),
            "ZABX",convertToBool( result[i].CPIC)
        );
        if(result[i].Level==2){//建立城市索引
            client.sadd(["baoxian:Region:Level:2", result[i].RegionId], function(err, reply) {
            //console.log(reply); // 1
            });
            createQueryIndex(result[i].Name,result[i].RegionId);      //建立汉字查询索引
            createQueryIndex(result[i].FullName,result[i].RegionId);  //建立汉字查询索引
            createQuerySpell(result[i].SpellIndex,result[i].RegionId);//建立混拼查询索引。
        }
        else if(result[i].Level==1){//建立省索引
            client.sadd("baoxian:Region:Level:1",result[i].RegionId);
        }
        else if(result[i].Level==3){//建立城镇区县索引
            client.sadd("baoxian:Region:County:CityID:"+result[i].RegionId.toString().substring(0,4)+"00",result[i].RegionId);
        }
    }
});

var createQuerySpell=function(data,regionId){
    console.log(data);
    var arry = data.split(";") ;
    for(var i in arry){
        if(arry[i]!="")
        {
            createQueryIndex(arry[i],regionId);
        }
    }
}
//建立查询索引
var createQueryIndex=function(data,regionId){
    var len = data.length;
    for(var i=1;i<=len;i++){
        client.sadd(["baoxian:Region:Query:"+ data.toLowerCase().substring(0,i),regionId], function(err, reply) {
            //console.log(reply); // 1
        });
    }
};
//建立移动站热点城市查询
var createMHotCityIndex=function(){
   var mHotCityIndex=new Array(110100,310100,440100,440300,320100,320500,330100,370100,370200,420100,430100,210100,510100,500100,610100,340100,350100,220100,130100,410100,120100,210200);
    for (var i in mHotCityIndex){
        client.sadd(["baoxian:Region:MHotCity",mHotCityIndex[i]], function(err, reply) {
            //console.log(reply); // 1
        });
        // client.zadd("baoxian:Region:HotCity",result[i].HotOrderNumber,result[i].RegionId);
    }
};
createMHotCityIndex();
dbHelper.select("dbo.InsuranceRegion","","where Level=2 and HotFlag=1 ","","order by [HotOrderNumber]",function(err,result){
    //console.log(result);
    for (var i in result){
        client.sadd(["baoxian:Region:HotCity",result[i].RegionId], function(err, reply) {
            //console.log(reply); // 1
        });
       // client.zadd("baoxian:Region:HotCity",result[i].HotOrderNumber,result[i].RegionId);
    }
});
if (cluster.isMaster) {
    // Fork workers. fock num of CPUS - 1 works
    for (var i = 1; i <= numCPUs; i++) {
        cluster.fork();
    }

    cluster.on('exit', function(worker, code, signal) {
        console.log('worker ' + worker.process.pid + ' died');
    });
    cluster.on('fork', function(worker, code, signal) {
        console.log('worker ' + worker.process.pid + ' is online');
    });
} else {
    app.get('/CityQuery', function(req, res) {
        res.writeHead(200, {
            "Content-Type": "text/plain;charset=utf-8"
        });
        var data={
            Result:true,
            Message:null,
            Data:null
        };
        try  {
            var key = req.query.key;
            console.log(key);
            var jsonp = req.query.jsoncallback;
            key=decodeURIComponent(key).toLowerCase();
            if(key==""|| key == null || key == undefined){
                data.Result=false;
                data.Message ="查询参数为空!";
                res.end(jsonp+"("+JSON.stringify(data)+")");
            }
            else{
                client.smembers('baoxian:Region:Query:'+key, function(err, reply) {
                    var len = reply.length
                    console.log(len); console.log(reply);
                    if(len==0) {
                        //data.Result=t;
                        data.Message ="没有匹配项!";
                        return res.end(jsonp + "(" + JSON.stringify(data) + ")");
                    }
                    var queryResult=new Array([len]);
                    var j=0;
                    for(var i=0 ;i<len;i++) {
                        client.hgetall("baoxian:Region:RegionId:" + reply[i], function (err, replyData) {
                            queryResult[j]=replyData;
                            j++;
                            if(queryResult[len-1]!=undefined){
                                data.Data = queryResult;
                                res.end(jsonp+"("+JSON.stringify(data)+")");
                            }
                        });
                    }
                });
            }
        }
        catch (error){
            console.log("[error]:"+error.name+error.message);
            res.end(error.name+error.message);
        }

        //WriteLogs(isSuccess,res);
        //res.end("数据提交完毕!");
        //console.log(req.query.key);

    });
    app.get('/HotCity', function(req, res) {
        var data={
            Result:true,
            Message:null,
            Data:null
        };
        try  {
            var jsonp =req.query.jsoncallback;
            console.log(jsonp);
            client.smembers('baoxian:Region:HotCity', function(err, reply) {
                var len = reply.length;
                var queryResult=new Array([len]);
                var j=0;
                for(var i=0 ;i<len;i++) {
                    client.hgetall("baoxian:Region:RegionId:" + reply[i], function (err, replyData) {
                        queryResult[j]=replyData;
                        j++;
                        if(queryResult[len-1]!=undefined){
                            data.Data = queryResult;
                            //console.log(jsonp+JSON.stringify(queryResult));
                            res.end(jsonp+"("+JSON.stringify(data)+")");
                        }
                    });
                }
            });
        }
        catch (error){
            console.log("[error]:"+error.name+error.message);
            res.end(error.name+error.message);
        }
        res.writeHead(200, {
            "Content-Type": "text/plain;charset=utf-8"
        });
        //WriteLo
    });
    app.get('/HotMCity', function(req, res) {

    });
    app.listen(9800);
}

nodejs中MSSQL查询,参考:http://blog.csdn.net/gzy11/article/details/52117793

 

nodejs查询效果如下:

 

以上是关于城市查询-拼音全拼简拼混拼卷舌音前后鼻音兼容查询C#与nodejs+redis应用---笔记的主要内容,如果未能解决你的问题,请参考以下文章

qq拼音输入法下载|qq拼音输入法纯净版下载

qq拼音输入法下载|qq拼音输入法纯净版下载

总结自己做过的事情

汉字转拼音

Java获取汉字的拼音

全国省市区数据库(带拼音简称行政编码邮政编码等)