語音識別的大規模漢語樹形詞典,搜尋速度快如閃電
阿新 • • 發佈:2018-11-03
分享一下我老師大神的人工智慧教程!零基礎,通俗易懂!http://blog.csdn.net/jiangjunshow
也歡迎大家轉載本篇文章。分享知識,造福人民,實現我們中華民族偉大復興!
- #include "stdafx.h"
- #include "YuyinTree.h"
- #include "YuyinTreeDlg.h"
- #ifdef _DEBUG
- #define new DEBUG_NEW
- #undef THIS_FILE
- static char THIS_FILE[] = __FILE__;
- #endif
- /////////////////////////////////////////////////////////////////////////////
- // CAboutDlg dialog used for App About
- CStoredPinyin* m_storedPinyin[30000];
- CWordTree* curTree=new CWordTree; //指向詞語樹指標
- CString MaxLenghci='/''; //儲存該單詞包括的最大子詞
- CString Tempci; //臨時詞
- int StoredNum=0; //已存入查詢陣列中拼音個數
- class CAboutDlg : public CDialog
- {
- public:
- CAboutDlg();
- // Dialog Data
- //{{AFX_DATA(CAboutDlg)
- enum { IDD = IDD_ABOUTBOX };
- //}}AFX_DATA
- // ClassWizard generated virtual function overrides
- //{{AFX_VIRTUAL(CAboutDlg)
- protected:
- virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
- //}}AFX_VIRTUAL
- // Implementation
- protected:
- //{{AFX_MSG(CAboutDlg)
- //}}AFX_MSG
- DECLARE_MESSAGE_MAP()
- };
- CAboutDlg::CAboutDlg() : CDialog(CAboutDlg::IDD)
- {
- //{{AFX_DATA_INIT(CAboutDlg)
- //}}AFX_DATA_INIT
- }
- void CAboutDlg::DoDataExchange(CDataExchange* pDX)
- {
- CDialog::DoDataExchange(pDX);
- //{{AFX_DATA_MAP(CAboutDlg)
- //}}AFX_DATA_MAP
- }
- BEGIN_MESSAGE_MAP(CAboutDlg, CDialog)
- //{{AFX_MSG_MAP(CAboutDlg)
- // No message handlers
- //}}AFX_MSG_MAP
- END_MESSAGE_MAP()
- /////////////////////////////////////////////////////////////////////////////
- // CYuyinTreeDlg dialog
- CYuyinTreeDlg::CYuyinTreeDlg(CWnd* pParent /*=NULL*/)
- : CDialog(CYuyinTreeDlg::IDD, pParent)
- {
- //{{AFX_DATA_INIT(CYuyinTreeDlg)
- m_inputTongyin = _T("");
- //}}AFX_DATA_INIT
- // Note that LoadIcon does not require a subsequent DestroyIcon in Win32
- m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
- }
- void CYuyinTreeDlg::DoDataExchange(CDataExchange* pDX)
- {
- CDialog::DoDataExchange(pDX);
- //{{AFX_DATA_MAP(CYuyinTreeDlg)
- DDX_Text(pDX, IDC_PINYIN_EQUAL, m_inputTongyin);
- //}}AFX_DATA_MAP
- }
- BEGIN_MESSAGE_MAP(CYuyinTreeDlg, CDialog)
- //{{AFX_MSG_MAP(CYuyinTreeDlg)
- ON_WM_SYSCOMMAND()
- ON_WM_PAINT()
- ON_WM_QUERYDRAGICON()
- ON_BN_CLICKED(IDC_BTN_READFILE, OnBtnReadfile)
- ON_BN_CLICKED(IDC_BTN_TEST_PANDCHI, OnBtnTestPandchi)
- ON_BN_CLICKED(IDC_BTN_TEST_TONGYINCI, OnBtnTestTongyinci)
- ON_BN_CLICKED(IDC_YUYIN_QUERY, OnTongyinQuery)
- ON_NOTIFY(TVN_SELCHANGED, IDC_YUYIN_TREE, OnSelchangedYuyinTree)
- //}}AFX_MSG_MAP
- END_MESSAGE_MAP()
- /////////////////////////////////////////////////////////////////////////////
- // CYuyinTreeDlg message handlers
- BOOL CYuyinTreeDlg::OnInitDialog()
- {
- CDialog::OnInitDialog();
- // Add "About..." menu item to system menu.
- // IDM_ABOUTBOX must be in the system command range.
- ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
- ASSERT(IDM_ABOUTBOX < 0xF000);
- CMenu* pSysMenu = GetSystemMenu(FALSE);
- if (pSysMenu != NULL)
- {
- CString strAboutMenu;
- strAboutMenu.LoadString(IDS_ABOUTBOX);
- if (!strAboutMenu.IsEmpty())
- {
- pSysMenu->AppendMenu(MF_SEPARATOR);
- pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
- }
- }
- // Set the icon for this dialog. The framework does this automatically
- // when the application's main window is not a dialog
- SetIcon(m_hIcon, TRUE); // Set big icon
- SetIcon(m_hIcon, FALSE); // Set small icon
- // TODO: Add extra initialization here
- //程式新增的程式碼:
- //初始化已讀單詞陣列
- for(int i=0;i<30000;i++)
- {
- m_storedPinyin[i]=NULL;
- }
- FILE *InputFile=NULL; //讀取檔案指標
- CTreeNode* CurNode=NULL; //當前節點
- CTreeNode* FatherNode=NULL; //當前節點的父節點
- TCHAR NodeItem[100]; //節點的全部資料
- TCHAR NodePinyin[51]; //節點的拼音
- TCHAR NodeHanzi[50]; //節點漢字
- int ZiMuNum,j,k;
- int WordNum=0; //統計詞的數目
- BOOL IsFirst=TRUE; //標誌是否是根節點的第一個孩子節點
- CTreeNode* TempfatherNode=NULL; //臨時父節點
- CTreeNode* TempfatherNode1=NULL; //臨時父節點
- InputFile=fopen("LexiconTree.txt","r");
- while(!feof(InputFile))
- {
- WordNum++;
- fscanf(InputFile,"%s",NodeItem);
- ZiMuNum=0; //ZiMuNum為讀取字母個數
- while(NodeItem[ZiMuNum]!=',')//讀取漢字
- {
- NodeHanzi[ZiMuNum]=NodeItem[ZiMuNum];
- ZiMuNum++;
- }
- NodeHanzi[ZiMuNum]='/0';// 此時NodeHanzi儲存了漢字
- ZiMuNum++;
- while(NodeItem[ZiMuNum]!=',')//讀取詞號
- {
- ZiMuNum++;
- }
- ZiMuNum++;
- j=ZiMuNum;
- k=0;
- while(NodeItem[j]!='/0') //讀拼音
- {
- NodePinyin[k]=NodeItem[j];
- k++;
- j++;
- }
- NodePinyin[k-1]='/0'; //此時NodePinyin儲存了拼音
- CString TempString; //暫時儲存拼音
- TempString=NodePinyin;
- CurNode=new CTreeNode;
- CurNode->m_Pinyin=TempString;
- CurNode->m_Word=NodeHanzi;
- CurNode->m_pParent=NULL;
- CurNode->m_pchild=NULL;
- CurNode->m_pneighbour=NULL;
- CurNode->m_pchar=NULL;
- CurNode->m_CurNum=0;
- BOOL Fviewed=FALSE; //是否查到的標誌,預設為沒查到
- BOOL FParent=FALSE; //接點是否有父節點的標誌,如a,;a,ba,;a,ba,hao,
- BOOL FEqual=FALSE; //相同發音詞標誌,如a,啊,阿,呵
- for(int temp=0;temp<30000;temp++) //temp為迭代次數,先檢檢視看是否已經插入了詞
- {
- if(m_storedPinyin[temp]!=NULL) //看當前檢視的資料項是否有值,若有進入迴圈,否則跳過
- {
- int t=-1;
- t=TempString.Find(m_storedPinyin[temp]->m_StoredPinyin); //find 的意思是找到整個匹配串
- if(t>=0) //若有重疊項,進入語句,插資料項
- {
- if(TempString==m_storedPinyin[temp]->m_StoredPinyin) //若兩者拼音完全相同,把漢字插入樹節點項的相似字陣列中
- {
- Fviewed=TRUE; //查到
- FEqual=TRUE; //相同詞標記TRUE
- int TempCurNum=(m_storedPinyin[temp]->m_pcurPosition)->m_CurNum;
- CTongyinci* temp1=new CTongyinci;
- temp1->next=NULL;
- temp1->m_data=new char[50];
- strcpy(temp1->m_data,NodeHanzi);
- CTongyinci* temp2=(m_storedPinyin[temp]->m_pcurPosition)->m_pchar;
- CTongyinci* pre;
- while(temp2!=NULL)
- {
- pre=temp2;
- temp2=temp2->next;
- }
- pre->next=temp1;
- (m_storedPinyin[temp]->m_pcurPosition)->m_CurNum=(m_storedPinyin[temp]->m_pcurPosition)->m_CurNum+1;
- break;
- }
- else if(t==0) //否則,插入樹節點項的子節點中,必需保證從第一個字對齊並且在待檢查接點中重疊部分後一位為",",檢查
- { //比如a和an不能是父子節點關係;la和a不能是父子節點關係
- Tempci=m_storedPinyin[temp]->m_StoredPinyin;
- int Strsize=Tempci.GetLength(); //問題
- if(TempString.GetAt(Strsize)==',') //若滿足重疊部分後一位為","
- {
- FParent=TRUE; //有父親節點
- if(Strsize>=MaxLenghci.GetLength())
- {
- MaxLenghci=Tempci; //注意
- TempfatherNode1=m_storedPinyin[temp]->m_pcurPosition; //注意
- }
- Fviewed=TRUE; //查到
- }
- }
- }
- }
- else
- continue;
- }
- if(FParent&&!FEqual)
- {
- CurNode->m_CurNum++; //當前同音字數加1
- /* char** pTempChar=new char*[50]; //開闢一個而維陣列,臨時值向同音字的指標
- for(int l=0;l<50;l++)
- {
- pTempChar[l]=new char[50];
- pTempChar[l][0]='/0';
- }
- */
- CTongyinci* temp=new CTongyinci;
- temp->next=NULL;
- temp->m_data=new char[50];
- strcpy(temp->m_data,NodeHanzi);
- CurNode->m_pchar=temp;
- curTree->InsertNode(&CurNode,&TempfatherNode1); //在查到的節點處插入子節點
- CStoredPinyin* TempStorPinyin1=new CStoredPinyin; //建立臨時待查詢物件
- TempStorPinyin1->m_pcurPosition=CurNode;
- TempStorPinyin1->m_StoredPinyin=TempString;
- TempStorPinyin1->m_pchar=temp;
- m_storedPinyin[StoredNum++]=TempStorPinyin1; //將產生節點存放已訪問陣列中
- }
- MaxLenghci='/0'; //恢復用到的兩個變數到初值
- TempfatherNode1=NULL;
- if(!Fviewed) //如果在已存詞表中找不到該詞則在根節點插入新詞,並存放在已訪問陣列中
- {
- if(IsFirst==TRUE)
- {
- TempfatherNode=curTree->m_Root;
- }
- CurNode->m_CurNum++; //當前同音字數加1
- /* char** pTempChar=new char*[50]; //開闢一個而維陣列,臨時值向同音字的指標,存100個詞
- for(int l=0;l<50;l++)
- {
- pTempChar[l]=new char[10];
- pTempChar[l][0]='/0';
- }
- CurNode->m_pchar=pTempChar;
- strcpy(CurNode->m_pchar[0],NodeHanzi);*/
- CTongyinci* temp=new CTongyinci;
- temp->next=NULL;
- temp->m_data=new char[50];
- strcpy(temp->m_data,NodeHanzi);
- CurNode->m_pchar=temp;
- TempfatherNode=curTree->InsertFirstNode(&CurNode,&TempfatherNode); //在根節點插入新詞
- IsFirst=FALSE; //以後插入的點全不是第一個節點
- CStoredPinyin* TempStorPinyin; //建立臨時待查詢物件
- TempStorPinyin=new CStoredPinyin;
- TempStorPinyin->m_pcurPosition=CurNode;
- TempStorPinyin->m_StoredPinyin=NodePinyin;
- TempStorPinyin->m_pchar=temp;
- //排錯flag pass 10月21號
- m_storedPinyin[StoredNum]=TempStorPinyin; //將產生節點存放已訪問陣列中
- StoredNum++;
- }
- }
- char buffer[50];
- sprintf(buffer,"已讀取單詞: %d",WordNum);
- AfxMessageBox(buffer);
- BrowseYuyin();
- return TRUE; // return TRUE unless you set the focus to a control
- }
- void CYuyinTreeDlg::OnSysCommand(UINT nID, LPARAM lParam)
- {
- if ((nID & 0xFFF0) == IDM_ABOUTBOX)
- {
- CAboutDlg dlgAbout;
- dlgAbout.DoModal();
- }
- else
- {
- CDialog::OnSysCommand(nID, lParam);
- }
- }
- // If you add a minimize button to your dialog, you will need the code below
- // to draw the icon. For MFC applications using the document/view model,
- // this is automatically done for you by the framework.
- void CYuyinTreeDlg::OnPaint()
- {
- if (IsIconic())
- {
- CPaintDC dc(this); // device context for painting
- SendMessage(WM_ICONERASEBKGND, (WPARAM) dc.GetSafeHdc(), 0);
- // Center icon in client rectangle
- int cxIcon = GetSystemMetrics(SM_CXICON);
- int cyIcon = GetSystemMetrics(SM_CYICON);
- CRect rect;
- GetClientRect(&rect);
- int x = (rect.Width() - cxIcon + 1) / 2;
- int y = (rect.Height() - cyIcon + 1) / 2;
- // Draw the icon
- dc.DrawIcon(x, y, m_hIcon);
- }
- else
- {
- CDialog::OnPaint();
- }
- }
- // The system calls this to obtain the cursor to display while the user drags
- // the minimized window.
- HCURSOR CYuyinTreeDlg::OnQueryDragIcon()
- {
- return (HCURSOR) m_hIcon;
- }
- // 新增的主要程式碼
- void CYuyinTreeDlg::OnBtnReadfile() //測試樹第一層節點
- {
- //檢驗第一層節點插入是否正確,結果正確
- CTreeNode* root=curTree->m_Root;
- CTreeNode* Temp=root->m_pchild;
- while(Temp!=NULL)
- {
- AfxMessageBox(Temp->m_Pinyin);
- Temp=Temp->m_pneighbour;
- }
- }
- CWordTree::CWordTree() //樹的初始化
- {
- m_Root=new CTreeNode;
- m_Root->m_Pinyin="";
- m_Root->m_pParent=NULL;
- m_Root->m_pchild=NULL;
- m_Root->m_pneighbour=NULL;
- m_Root->m_CurNum=0;
- m_Root->m_Word="";
- m_Root->m_pchar=NULL;
- }
- void CWordTree::CreateTree()
- {
- }
- CTreeNode* CWordTree::InsertFirstNode(CTreeNode** newNode,CTreeNode** parent) //插入第一層節點
- {
- CTreeNode* WilladNode=*newNode;
- CTreeNode* FatherNode=*parent;
- if(FatherNode==curTree->m_Root)
- {
- FatherNode->m_pchild=WilladNode;
- WilladNode->m_pParent=curTree->m_Root;
- }
- else
- {
- FatherNode->m_pneighbour=WilladNode;
- WilladNode->