程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> 網頁編程 >> PHP編程 >> 關於PHP編程 >> php簡單中文分詞系統(1/2)

php簡單中文分詞系統(1/2)

編輯:關於PHP編程

php簡單中文分詞系統結構:首字散列表、Trie索引樹結點優點:分詞中,不需預知待查詢詞的長度,沿樹鏈逐字匹配。缺點:構造和維護比較復雜,單詞樹枝多,浪費了一定的空間

php教程簡單中文分詞系統

結構:首字散列表、trie索引樹結點
優點:分詞中,不需預知待查詢詞的長度,沿樹鏈逐字匹配。
缺點:構造和維護比較復雜,單詞樹枝多,浪費了一定的空間
* @version 0.1
* @todo 構造通用的字典算法,並寫了一個簡易的分詞
* @author [email protected]
* trie字典樹
*
*/

class trie
{
        private $trie;

        function __construct()
        {
                 $trie = array('children' => array(),'isword'=>false);
        }

        /**
         * 把詞加入詞典
         *
         * @param string $key
         */
        function &setword($word='')
        {
                $trienode = &$this->trie;
                for($i = 0;$i < strlen($word);$i++)
                {
                        $character = $word[$i];
                        if(!isset($trienode['children'][$character]))
                        {
                                $trienode['children'][$character] = array('isword'=>false);
                        }
                        if($i == strlen($word)-1)
                        {
                                        $trienode['children'][$character] = array('isword'=>true);
                        }
                        $trienode = &$trienode['children'][$character];
                }
        }

        /**
         * 判斷是否為詞典詞
         *
         * @param string $word
         * @return bool true/false
         */
        function & isword($word)
        {
                $trienode = &$this->trie;
                for($i = 0;$i < strlen($word);$i++)
                {
                        $character = $word[$i];
                        if(!isset($trienode['children'][$character]))
                        {
                                return false;
                        }
                        else
                        {
                                //判斷詞結束
                                if($i == (strlen($word)-1) && $trienode['children'][$character]['isword'] == true)
                                {
                                        return true;
                                }
                                elseif($i == (strlen($word)-1) && $trienode['children'][$character]['isword'] == false)
                                {
                                        return false;
                                }
                                $trienode = &$trienode['children'][$character];       
                        }
                }
        }


        /**
         * 在文本$text找詞出現的位置
         *
         * @param string $text
         * @return array array('position'=>$position,'word' =>$word);
         */
        function search($text="")
        {
                $textlen = strlen($text);
                $trienode = $tree = $this->trie;
                $find = array();
                $wordrootposition = 0;//詞根位置
                $prenode = false;//回溯參數,當詞典ab,在字符串aab中,需要把$i向前回溯一次
                $word = '';
                for ($i = 0; $i < $textlen;$i++)
                {

                        if(isset($trienode['children'][$text[$i]]))
                        {
                                $word = $word .$text[$i];
                                $trienode = $trienode['children'][$text[$i]];
                                if($prenode == false)
                                {
                                        $wordrootposition = $i;
                                }
                                $prenode = true;
                                if($trienode['isword'])
                                {
                                        $find[] = array('position'=>$wordrootposition,'word' =>$word);
                                }
                        }
                        else
                        {
                                $trienode = $tree;
                                $word = '';
                                if($prenode)
                                {
                                        $i = $i -1;
                                        $prenode = false;
                                }
                        }
                }
                return $find;
        }
}

1 2

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved