Optimizing Trie implementation
For no reason other than fun I implemented a Trie today. At the moment it supports add() and search(), remove() should also be implemented but I think that's fairly straight forward.
It is fully functional, but filling the Trie with data takes a little too much for my taste. I'm using this list as datasource: http://www.isc.ro/lists/twl06.zip (found somewhere else on SO). It takes ~11s to load. My initial implementation took ~15s so I already gave it a nice performance boost, but I'm still not satisfied :)
My question is: what else could give me a (substantial) performance boost? I'm not bound by this design, a complete overhaul is acceptable.
class Trie
{
private $trie;
public function __construct(TrieNode $trie = null)
{
if($trie !== null) $this->trie = $trie;
else $this->trie = new TrieNode();
$this->counter = 0;
}
public function add($value, $val = null)
{
$str = '';
$trie_ref = $this->trie;
foreach(str_split($value) as $char)
{
$str .= $char;
$trie_ref = $trie_ref->addNode($str);
}
$trie_ref->value = $val;
return true;
}
public function search($value, $only_words = false)
{
if($value === '') return $this->trie;
$trie_ref = $this->trie;
$str = '';
foreach(str_split($value) as $char)
{
$str .= $char;
if($trie_ref = $trie_ref->getNode($str))
{
if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref));
continue;
}
return false;
}
return false;
}
public function extractWords(TrieNode $trie)
{
$res = array();
foreach($trie->getChildren() as $child)
{
if($child->value !== null) $res[] = $child->value;
if($child->hasChildren()) $res = array_merge($res, $this->extractWords($child));
}
return $res;
}
}
class TrieNode
{
public $value;
protected $children = array();
public function addNode($index)
{
if(isset($this-&g开发者_如何学Pythont;children[$index])) return $this->children[$index];
return $this->children[$index] = new self();
}
public function getNode($index)
{
return (isset($this->children[$index]) ? $this->children[$index] : false);
}
public function getChildren()
{
return $this->children;
}
public function hasChildren()
{
return count($this->children)>0;
}
}
Don't know php but,
in the following methods:
public function add($value, $val = null)
{
$str = '';
$trie_ref = $this->trie;
foreach(str_split($value) as $char)
{
$str .= $char;
$trie_ref = $trie_ref->addNode($str);
}
$trie_ref->value = $val;
return true;
}
public function search($value, $only_words = false)
{
if($value === '') return $this->trie;
$trie_ref = $this->trie;
$str = '';
foreach(str_split($value) as $char)
{
$str .= $char;
if($trie_ref = $trie_ref->getNode($str))
{
if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref));
continue;
}
return false;
}
return false;
}
Why do you even need the $str .= $char
(which I suppose is append)? This itself changes your O(n) time addition/searching to Omega(n^2) (n is length of $value
) instead of O(n).
In a trie, you usually walk the trie while walking the string i.e you find the next node based on the current character, rather than the current prefix.
I suppose this implementation is for a Key|value type of insertion and lookup? Here is one that handles [English] words.
class Trie {
static function insert_word(Node $root, $text)
{
$v = $root;
foreach(str_split($text) as $char) {
$next = $v->children[$char];
if ($next === null)
{
$v->children[$char] = $next = new Node();
}
$v = $next;
}
$v->leaf = true;
}
static function get_words_sorted(Node $node, $text)
{
$res = array();
for($ch = 0; $ch < 128; $ch++) {
$child = $node->children[chr($ch)];
if ($child !== null)
{
$res = array_merge($res, Trie::get_words_sorted($child, $text . chr($ch)));
}
}
if ($node->leaf === true)
{
$res[] = $text;
}
return $res;
}
static function search(Node $root, $text)
{
$v = $root;
while($v !== null)
{
foreach(str_split($text) as $char) {
$next = $v->children[$char];
if ($next === null)
{
return false;
}
else
{
$v = $next;
}
}
if($v->leaf === true)
{
return true;
}
else
{
return false;
}
}
return false;
}
}
class Node {
public $children;
public $leaf;
function __construct()
{
$children = Array();
}
}
Example usage
$root = new Node();
$words = Array("an", "ant", "all", "allot", "alloy", "aloe", "are", "ate", "be");
for ($i = 0; $i < sizeof($words); $i++)
{
Trie::insert_word($root, $words[$i]);
}
$search_words = array("alloy", "ant", "bee", "aren't", "allot");
foreach($search_words as $word)
{
if(Trie::search($root, $word) === true)
{
echo $word . " IS in my dictionary<br/>";
}
else
{
echo $word . " is NOT in my dictionary <br/>";
}
}
精彩评论