语音识别的大规模汉语树形词典 搜索速度快如闪电
Posted wicnwicnwh
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了语音识别的大规模汉语树形词典 搜索速度快如闪电相关的知识,希望对你有一定的参考价值。
- #include "stdafx.h"
- #include "YuyinTree.h"
- #include "YuyinTreeDlg.h"
- #ifdef _DEBUG
- #define new DEBUG_NEW
- #undef THIS_FILE
- static char THIS_FILE[] = __FILE__;
- #endif
- /////////////////////////////////////////////////////////////////////////////
- // CAboutDlg dialog used for App About
- CStoredPinyin* m_storedPinyin[30000];
- CWordTree* curTree=new CWordTree; //指向词语树指针
- CString MaxLenghci=‘/‘‘; //存储该单词包括的最大子词
- CString Tempci; //临时词
- int StoredNum=0; //已存入查询数组中拼音个数
- class CAboutDlg : public CDialog
- {
- public:
- CAboutDlg();
- // Dialog Data
- //{{AFX_DATA(CAboutDlg)
- enum { IDD = IDD_ABOUTBOX };
- //}}AFX_DATA
- // ClassWizard generated virtual function overrides
- //{{AFX_VIRTUAL(CAboutDlg)
- protected:
- virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
- //}}AFX_VIRTUAL
- // Implementation
- protected:
- //{{AFX_MSG(CAboutDlg)
- //}}AFX_MSG
- DECLARE_MESSAGE_MAP()
- };
- CAboutDlg::CAboutDlg() : CDialog(CAboutDlg::IDD)
- {
- //{{AFX_DATA_INIT(CAboutDlg)
- //}}AFX_DATA_INIT
- }
- void CAboutDlg::DoDataExchange(CDataExchange* pDX)
- {
- CDialog::DoDataExchange(pDX);
- //{{AFX_DATA_MAP(CAboutDlg)
- //}}AFX_DATA_MAP
- }
- BEGIN_MESSAGE_MAP(CAboutDlg, CDialog)
- //{{AFX_MSG_MAP(CAboutDlg)
- // No message handlers
- //}}AFX_MSG_MAP
- END_MESSAGE_MAP()
- /////////////////////////////////////////////////////////////////////////////
- // CYuyinTreeDlg dialog
- CYuyinTreeDlg::CYuyinTreeDlg(CWnd* pParent /*=NULL*/)
- : CDialog(CYuyinTreeDlg::IDD, pParent)
- {
- //{{AFX_DATA_INIT(CYuyinTreeDlg)
- m_inputTongyin = _T("");
- //}}AFX_DATA_INIT
- // Note that LoadIcon does not require a subsequent DestroyIcon in Win32
- m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
- }
- void CYuyinTreeDlg::DoDataExchange(CDataExchange* pDX)
- {
- CDialog::DoDataExchange(pDX);
- //{{AFX_DATA_MAP(CYuyinTreeDlg)
- DDX_Text(pDX, IDC_PINYIN_EQUAL, m_inputTongyin);
- //}}AFX_DATA_MAP
- }
- BEGIN_MESSAGE_MAP(CYuyinTreeDlg, CDialog)
- //{{AFX_MSG_MAP(CYuyinTreeDlg)
- ON_WM_SYSCOMMAND()
- ON_WM_PAINT()
- ON_WM_QUERYDRAGICON()
- ON_BN_CLICKED(IDC_BTN_READFILE, OnBtnReadfile)
- ON_BN_CLICKED(IDC_BTN_TEST_PANDCHI, OnBtnTestPandchi)
- ON_BN_CLICKED(IDC_BTN_TEST_TONGYINCI, OnBtnTestTongyinci)
- ON_BN_CLICKED(IDC_YUYIN_QUERY, OnTongyinQuery)
- ON_NOTIFY(TVN_SELCHANGED, IDC_YUYIN_TREE, OnSelchangedYuyinTree)
- //}}AFX_MSG_MAP
- END_MESSAGE_MAP()
- /////////////////////////////////////////////////////////////////////////////
- // CYuyinTreeDlg message handlers
- BOOL CYuyinTreeDlg::OnInitDialog()
- {
- CDialog::OnInitDialog();
- // Add "About..." menu item to system menu.
- // IDM_ABOUTBOX must be in the system command range.
- ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
- ASSERT(IDM_ABOUTBOX < 0xF000);
- CMenu* pSysMenu = GetSystemMenu(FALSE);
- if (pSysMenu != NULL)
- {
- CString strAboutMenu;
- strAboutMenu.LoadString(IDS_ABOUTBOX);
- if (!strAboutMenu.IsEmpty())
- {
- pSysMenu->AppendMenu(MF_SEPARATOR);
- pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
- }
- }
- // Set the icon for this dialog. The framework does this automatically
- // when the application‘s main window is not a dialog
- SetIcon(m_hIcon, TRUE); // Set big icon
- SetIcon(m_hIcon, FALSE); // Set small icon
- // TODO: Add extra initialization here
- //程序添加的代码:
- //初始化已读单词数组
- for(int i=0;i<30000;i++)
- {
- m_storedPinyin[i]=NULL;
- }
- FILE *InputFile=NULL; //读取文件指针
- CTreeNode* CurNode=NULL; //当前节点
- CTreeNode* FatherNode=NULL; //当前节点的父节点
- TCHAR NodeItem[100]; //节点的全部数据
- TCHAR NodePinyin[51]; //节点的拼音
- TCHAR NodeHanzi[50]; //节点汉字
- int ZiMuNum,j,k;
- int WordNum=0; //统计词的数目
- BOOL IsFirst=TRUE; //标志是否是根节点的第一个孩子节点
- CTreeNode* TempfatherNode=NULL; //临时父节点
- CTreeNode* TempfatherNode1=NULL; //临时父节点
- InputFile=fopen("LexiconTree.txt","r");
- while(!feof(InputFile))
- {
- WordNum++;
- fscanf(InputFile,"%s",NodeItem);
- ZiMuNum=0; //ZiMuNum为读取字母个数
- while(NodeItem[ZiMuNum]!=‘,‘)//读取汉字
- {
- NodeHanzi[ZiMuNum]=NodeItem[ZiMuNum];
- ZiMuNum++;
- }
- NodeHanzi[ZiMuNum]=‘/0‘;// 此时NodeHanzi存储了汉字
- ZiMuNum++;
- while(NodeItem[ZiMuNum]!=‘,‘)//读取词号
- {
- ZiMuNum++;
- }
- ZiMuNum++;
- j=ZiMuNum;
- k=0;
- while(NodeItem[j]!=‘/0‘) //读拼音
- {
- NodePinyin[k]=NodeItem[j];
- k++;
- j++;
- }
- NodePinyin[k-1]=‘/0‘; //此时NodePinyin存储了拼音
- CString TempString; //暂时存储拼音
- TempString=NodePinyin;
- CurNode=new CTreeNode;
- CurNode->m_Pinyin=TempString;
- CurNode->m_Word=NodeHanzi;
- CurNode->m_pParent=NULL;
- CurNode->m_pchild=NULL;
- CurNode->m_pneighbour=NULL;
- CurNode->m_pchar=NULL;
- CurNode->m_CurNum=0;
- BOOL Fviewed=FALSE; //是否查到的标志,默认为没查到
- BOOL FParent=FALSE; //接点是否有父节点的标志,如a,;a,ba,;a,ba,hao,
- BOOL FEqual=FALSE; //相同发音词标志,如a,啊,阿,呵
- for(int temp=0;temp<30000;temp++) //temp为迭代次数,先检查看看是否已经插入了词
- {
- if(m_storedPinyin[temp]!=NULL) //看当前查看的数据项是否有值,若有进入循环,否则跳过
- {
- int t=-1;
- t=TempString.Find(m_storedPinyin[temp]->m_StoredPinyin); //find 的意思是找到整个匹配串
- if(t>=0) //若有重叠项,进入语句,插数据项
- {
- if(TempString==m_storedPinyin[temp]->m_StoredPinyin) //若两者拼音完全相同,把汉字插入树节点项的相似字数组中
- {
- Fviewed=TRUE; //查到
- FEqual=TRUE; //相同词标记TRUE
- int TempCurNum=(m_storedPinyin[temp]->m_pcurPosition)->m_CurNum;
- CTongyinci* temp1=new CTongyinci;
- temp1->next=NULL;
- temp1->m_data=new char[50];
- strcpy(temp1->m_data,NodeHanzi);
- CTongyinci* temp2=(m_storedPinyin[temp]->m_pcurPosition)->m_pchar;
- CTongyinci* pre;
- while(temp2!=NULL)
- {
- pre=temp2;
- temp2=temp2->next;
- }
- pre->next=temp1;
- (m_storedPinyin[temp]->m_pcurPosition)->m_CurNum=(m_storedPinyin[temp]->m_pcurPosition)->m_CurNum+1;
- break;
- }
- else if(t==0) //否则,插入树节点项的子节点中,必需保证从第一个字对齐并且在待检查接点中重叠部分后一位为",",检查
- { //比如a和an不能是父子节点关系;la和a不能是父子节点关系
- Tempci=m_storedPinyin[temp]->m_StoredPinyin;
- int Strsize=Tempci.GetLength(); //问题
- if(TempString.GetAt(Strsize)==‘,‘) //若满足重叠部分后一位为","
- {
- FParent=TRUE; //有父亲节点
- if(Strsize>=MaxLenghci.GetLength())
- {
- MaxLenghci=Tempci; //注意
- TempfatherNode1=m_storedPinyin[temp]->m_pcurPosition; //注意
- }
- Fviewed=TRUE; //查到
- }
- }
- }
- }
- else
- continue;
- }
- if(FParent&&!FEqual)
- {
- CurNode->m_CurNum++; //当前同音字数加1
- /* char** pTempChar=new char*[50]; //开辟一个而维数组,临时值向同音字的指针
- for(int l=0;l<50;l++)
- {
- pTempChar[l]=new char[50];
- pTempChar[l][0]=‘/0‘;
- }
- */
- CTongyinci* temp=new CTongyinci;
- temp->next=NULL;
- temp->m_data=new char[50];
- strcpy(temp->m_data,NodeHanzi);
- CurNode->m_pchar=temp;
- curTree->InsertNode(&CurNode,&TempfatherNode1); //在查到的节点处插入子节点
- CStoredPinyin* TempStorPinyin1=new CStoredPinyin; //创建临时待查询对象
- TempStorPinyin1->m_pcurPosition=CurNode;
- TempStorPinyin1->m_StoredPinyin=TempString;
- TempStorPinyin1->m_pchar=temp;
- m_storedPinyin[StoredNum++]=TempStorPinyin1; //将产生节点存放已访问数组中
- }
- MaxLenghci=‘/0‘; //恢复用到的两个变量到初值
- TempfatherNode1=NULL;
- if(!Fviewed) //如果在已存词表中找不到该词则在根节点插入新词,并存放在已访问数组中
- {
- if(IsFirst==TRUE)
- {
- TempfatherNode=curTree->m_Root;
- }
- CurNode->m_CurNum++; //当前同音字数加1
- /* char** pTempChar=new char*[50]; //开辟一个而维数组,临时值向同音字的指针,存100个词
- for(int l=0;l<50;l++)
- {
- pTempChar[l]=new char[10];
- pTempChar[l][0]=‘/0‘;
- }
- CurNode->m_pchar=pTempChar;
- strcpy(CurNode->m_pchar[0],NodeHanzi);*/
- CTongyinci* temp=new CTongyinci;
- temp->next=NULL;
- temp->m_data=new char[50];
- strcpy(temp->m_data,NodeHanzi);
- CurNode->m_pchar=temp;
- TempfatherNode=curTree->InsertFirstNode(&CurNode,&TempfatherNode); //在根节点插入新词
- IsFirst=FALSE; //以后插入的点全不是第一个节点
- CStoredPinyin* TempStorPinyin; //创建临时待查询对象
- TempStorPinyin=new CStoredPinyin;
- TempStorPinyin->m_pcurPosition=CurNode;
- TempStorPinyin->m_StoredPinyin=NodePinyin;
- TempStorPinyin->m_pchar=temp;
- //排错flag pass 10月21号
- m_storedPinyin[StoredNum]=TempStorPinyin; //将产生节点存放已访问数组中
- StoredNum++;
- }
- }
- char buffer[50];
- sprintf(buffer,"已读取单词: %d",WordNum);
- AfxMessageBox(buffer);
- BrowseYuyin();
- return TRUE; // return TRUE unless you set the focus to a control
- }
- void CYuyinTreeDlg::OnSysCommand(UINT nID, LPARAM lParam)
- {
- if ((nID & 0xFFF0) == IDM_ABOUTBOX)
- {
- CAboutDlg dlgAbout;
- dlgAbout.DoModal();
- }
- else
- {
- CDialog::OnSysCommand(nID, lParam);
- }
- }
- // If you add a minimize button to your dialog, you will need the code below
- // to draw the icon. For MFC applications using the document/view model,
- // this is automatically done for you by the framework.
- void CYuyinTreeDlg::OnPaint()
- {
- if (IsIconic())
- {
- CPaintDC dc(this); // device context for painting
- SendMessage(WM_ICONERASEBKGND, (WPARAM) dc.GetSafeHdc(), 0);
- // Center icon in client rectangle
- int cxIcon = GetSystemMetrics(SM_CXICON);
- int cyIcon = GetSystemMetrics(SM_CYICON);
- CRect rect;
- GetClientRect(&rect);
- int x = (rect.Width() - cxIcon + 1) / 2;
- int y = (rect.Height() - cyIcon + 1) / 2;
- // Draw the icon
- dc.DrawIcon(x, y, m_hIcon);
- }
- else
- {
- CDialog::OnPaint();
- }
- }
- // The system calls this to obtain the cursor to display while the user drags
- // the minimized window.
- HCURSOR CYuyinTreeDlg::OnQueryDragIcon()
- {
- return (HCURSOR) m_hIcon;
- }
- // 添加的主要代码
- void CYuyinTreeDlg::OnBtnReadfile() //测试树第一层节点
- {
- //检验第一层节点插入是否正确,结果正确
- CTreeNode* root=curTree->m_Root;
- CTreeNode* Temp=root->m_pchild;
- while(Temp!=NULL)
- {
- AfxMessageBox(Temp->m_Pinyin);
- Temp=Temp->m_pneighbour;
- }
- }
- CWordTree::CWordTree() //树的初始化
- {
- m_Root=new CTreeNode;
- m_Root->m_Pinyin="";
- m_Root->m_pParent=NULL;
- m_Root->m_pchild=NULL;
- m_Root->m_pneighbour=NULL;
- m_Root->m_CurNum=0;
- m_Root->m_Word="";
- m_Root->m_pchar=NULL;
- }
- void CWordTree::CreateTree()
- {
- }
- CTreeNode* CWordTree::InsertFirstNode(CTreeNode** newNode,CTreeNode** parent) //插入第一层节点
- {
- CTreeNode* WilladNode=*newNode;
- CTreeNode* FatherNode=*parent;
- if(FatherNode==curTree->m_Root)
- {
- FatherNode->m_pchild=WilladNode;
- WilladNode->m_pParent=curTree->m_Root;
- }
- else
- {
- FatherNode->m_pneighbour=WilladNode;
- WilladNode->m_pParent=curTree->m_Root;
- }
- return WilladNode;
- }
- void CWordTree::InsertNode(CTreeNode** newNode,CTreeNode** parent) //插入某层节点的孩子节点
- {
- CTreeNode* WilladNode=*newNode;
- CTreeNode* FatherNode=*parent;
- CTreeNode* Temp=NULL;
- if(FatherNode->m_pchild==NULL)
- {
- FatherNode->m_pchild=WilladNode;
- WilladNode->m_pParent=FatherNode;
- }
- else
- {
- Temp=FatherNode->m_pchild;
- while(Temp->m_pneighbour!=NULL)
- {
- Temp=Temp->m_pneighbour;
- }
- Temp->m_pneighbour=WilladNode;
- WilladNode->m_pParent=FatherNode;
- }
- }
- void CYuyinTreeDlg::OnBtnTestPandchi() //测试树的相邻层接点
- {
- CTreeNode* root=curTree->m_Root;
- CTreeNode* Temp=root->m_pchild;
- AfxMessageBox(Temp->m_Pinyin);
- Temp=Temp->m_pchild;
- while(Temp!=NULL)
- {
- AfxMessageBox(Temp->m_Pinyin);
- Temp=Temp->m_pneighbour;
- }
- }
- void CYuyinTreeDlg::OnBtnTestTongyinci() //测试发音为‘a‘的同音词
- {
- CTreeNode* NodeA=curTree->m_Root->m_pchild;
- CTongyinci* temp=NodeA->m_pchar;
- while(temp!=NULL)
- {
- AfxMessageBox(temp->m_data);
- temp=temp->next;
- }
- }
- void CYuyinTreeDlg::BrowseYuyin() //遍历4层语音树
- {
- CTreeCtrl* pCtrl = (CTreeCtrl*) GetDlgItem(IDC_YUYIN_TREE);
- ASSERT(pCtrl != NULL);
- TVINSERTSTRUCT tvInsert;
- tvInsert.hParent = NULL;
- tvInsert.hInsertAfter = NULL;
- tvInsert.item.mask = TVIF_TEXT;
- tvInsert.item.pszText = _T("词拼音");
- HTREEITEM hRoot = pCtrl->InsertItem(&tvInsert);
- CTreeNode* root=curTree->m_Root;
- CTreeNode* Temp=root->m_pchild;
- HTREEITEM hPA=NULL;
- HTREEITEM hPB=NULL;
- HTREEITEM hPC=NULL;
- while(Temp!=NULL)
- {
- hPA=pCtrl->InsertItem(TVIF_TEXT,
- _T(Temp->m_Pinyin), 0, 0, 0, 0, 0, hRoot, NULL);
- CTreeNode* TempNode1=Temp->m_pchild;
- while(TempNode1!=NULL)
- {
- hPB=pCtrl->InsertItem(TVIF_TEXT,
- _T(TempNode1->m_Pinyin), 0, 0, 0, 0, 0, hPA, NULL);
- CTreeNode* TempNode2=TempNode1->m_pchild;
- while(TempNode2!=NULL)
- {
- hPC=pCtrl->InsertItem(TVIF_TEXT,
- _T(TempNode2->m_Pinyin), 0, 0, 0, 0, 0, hPB, NULL);
- CTreeNode* TempNode3=TempNode2->m_pchild;
- while(TempNode3!=NULL)
- {
- pCtrl->InsertItem(TVIF_TEXT,
- _T(TempNode3->m_Pinyin), 0, 0, 0, 0, 0, hPC, NULL);
- TempNode3=TempNode3->m_pneighbour;
- }
- TempNode2=TempNode2->m_pneighbour;
- }
- TempNode1=TempNode1->m_pneighbour;
- }
- Temp=Temp->m_pneighbour;
- }
- }
- void CYuyinTreeDlg::OnTongyinQuery() //查询拼音的同音字,放入Combox中
- {
- // TODO: Add your control notification handler code here
- UpdateData(TRUE);
- CString StringWilqury=m_inputTongyin;
- CString Tempci;
- BOOL FExit=FALSE; //该拼音在树中是否存在标志
- int TempNum;
- CComboBox* tongyinc=(CComboBox*)GetDlgItem(IDC_PINYIN_RESULT);
- ASSERT(tongyinc != NULL);
- tongyinc->ResetContent();
- ASSERT(tongyinc->GetCount() == 0);
- for(int temp=0;temp<30000;temp++)
- {
- if(m_storedPinyin[temp]!=NULL&&m_storedPinyin[temp]->m_StoredPinyin==StringWilqury)
- {
- TempNum=m_storedPinyin[temp]->m_pcurPosition->m_CurNum;
- // for(int i=0;i<TEMPNUM;I++) Tempci="m_storedPinyin[temp]-" {>m_pcurPosition->m_pchar[i];
- // }
- CTongyinci* temp1=m_storedPinyin[temp]->m_pcurPosition->m_pchar;
- while(temp1!=NULL)
- {
- Tempci=temp1->m_data;
- temp1=temp1->next;
- tongyinc->AddString(Tempci);
- }
- tongyinc->SetWindowText(m_storedPinyin[temp]->m_pcurPosition->m_pchar->m_data);
- FExit=TRUE;
- }
- }
- if(!FExit)
- tongyinc->SetWindowText("该拼音在树中不存在!");
- }
- void CYuyinTreeDlg::OnSelchangedYuyinTree(NMHDR* pNMHDR, LRESULT* pResult) //当单击鼠标时响应函数
- {
- NM_TREEVIEW* pNMTreeView = (NM_TREEVIEW*)pNMHDR;
- // TODO: Add your control notification handler code here
- TV_ITEM item=pNMTreeView->itemNew;
- HTREEITEM hItem=item.hItem;
- CTreeCtrl* pCtrl = (CTreeCtrl*) GetDlgItem(IDC_YUYIN_TREE);
- ASSERT(pCtrl != NULL);
- CString Tempci=pCtrl->GetItemText(hItem);
- if(Tempci=="词拼音")
- return;
- BOOL FExit=FALSE; //该拼音在树中是否存在标志
- int TempNum;
- CComboBox* tongyinc=(CComboBox*)GetDlgItem(IDC_PINYIN_RESULT);
- ASSERT(tongyinc != NULL);
- tongyinc->ResetContent();
- ASSERT(tongyinc->GetCount() == 0);
- for(int temp=0;temp<30000;temp++)
- {
- if(m_storedPinyin[temp]!=NULL&&m_storedPinyin[temp]->m_StoredPinyin==Tempci)
- {
- TempNum=m_storedPinyin[temp]->m_pcurPosition->m_CurNum;
- /* for(int i=0;i<TEMPNUM;I++) Tempci="m_storedPinyin[temp]-" {>m_pcurPosition->m_pchar[i];
- tongyinc->AddString(Tempci);
- }
- */
- CTongyinci* temp1=m_storedPinyin[temp]->m_pcurPosition->m_pchar;
- while(temp1!=NULL)
- {
- Tempci=temp1->m_data;
- temp1=temp1->next;
- tongyinc->AddString(Tempci);
- }
- tongyinc->SetWindowText(m_storedPinyin[temp]->m_pcurPosition->m_pchar->m_data);
- FExit=TRUE;
- }
- }
- if(!FExit)
- tongyinc->SetWindowText("该拼音在树中不存在!");
- *pResult = 0;
- }
再分享一下我老师大神的人工智能教程吧。零基础!通俗易懂!风趣幽默!希望你也加入到我们人工智能的队伍中来!http://www.captainbed.net
以上是关于语音识别的大规模汉语树形词典 搜索速度快如闪电的主要内容,如果未能解决你的问题,请参考以下文章