Find the word which all character is matching with other words in python
like umbellar = umbrella both are equal words.
Input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu","eyra","egma","game","leam","amel","year","meal","yare","gun","alme","ung","male","lame","mela","mage" ]
so output should be :
output=[ ["umbellar","umbrella"], ["ago","goa"], ["aery","ayre","eyra","yare","year"开发者_开发问答], ["alem","alme","amel","lame","leam","male","meal","mela"], ["gnu","gun","ung"] ["egma","game","mage"], ]
from itertools import groupby
def group_words(word_list):
sorted_words = sorted(word_list, key=sorted)
grouped_words = groupby(sorted_words, sorted)
for key, words in grouped_words:
group = list(words)
if len(group) > 1:
yield group
Example:
>>> group_words(["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu","eyra","egma","game","leam","amel","year","meal","yare","gun","alme","ung","male","lame","mela","mage" ])
<generator object group_words at 0x0297B5F8>
>>> list(_)
[['umbellar', 'umbrella'], ['egma', 'game', 'mage'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['aery', 'ayre', 'eyra', 'year', 'yare'], ['goa', 'ago'], ['gnu', 'gun', 'ung']]
They're not equal words, they're anagrams.
Anagrams can be found by sorting by character:
sorted('umbellar') == sorted('umbrella')
collections.defaultdict
comes in handy:
from collections import defaultdict
input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu",
"eyra","egma","game","leam","amel","year","meal","yare","gun",
"alme","ung","male","lame","mela","mage" ]
D = defaultdict(list)
for i in input:
key = ''.join(sorted(input))
D[key].append(i)
output = D.values()
And output is [['umbellar', 'umbrella'], ['goa', 'ago'], ['gnu', 'gun', 'ung'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['egma', 'game', 'mage'], ['aery', 'ayre', 'eyra', 'year', 'yare']]
As others point out you're looking for all the groups of anagrams in your list of words. here you have a possible solution. This algorithm looks for candidates and selects one (first element) as the canonical word, deletes the rest as possible words because anagrams are transitive and once you find that a word belongs to an anagram group you don't need to recompute it again.
input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu",
"eyra","egma","game","leam","amel","year","meal","yare","gun",
"alme","ung","male","lame","mela","mage" ]
res = dict()
for word in input:
res[word]=[word]
for word in input:
#the len test is just to avoid sorting and comparing words of different len
candidates = filter(lambda x: len(x) == len(word) and\
sorted(x) == sorted(word),res.keys())
if len(candidates):
canonical = candidates[0]
for c in candidates[1:]:
#we delete all candidates expect the canonical/
del res[c]
#we add the others to the canonical member
res[canonical].append(c)
print res.values()
This algth outputs ...
[['year', 'ayre', 'aery', 'yare', 'eyra'], ['umbellar', 'umbrella'],
['lame', 'leam', 'mela', 'amel', 'alme', 'alem', 'male', 'meal'],
['goa', 'ago'], ['game', 'mage', 'egma'], ['gnu', 'gun', 'ung']]
the answer of Shang is right......but I have been challenged to do same thing without using .... 'groupby()' ....... here it is..... adding the print statements will help you in debugging the code and runtime output....
def group_words(word_list):
global new_list
list1 = []
_list0 = []
_list1 = []
new_list = []
for elm in word_list:
list_elm = list(elm)
list1.append(list(list_elm))
for ee in list1:
ee = sorted(ee)
ee = ''.join(ee)
_list1.append(ee)
_list1 = list(set(_list1))
for _e1 in _list1:
for e0 in word_list:
if len(e0) == len(_e1):
list_e0 = ''.join(sorted(e0))
if _e1 == list_e0:
_list0.append(e0)
_list0 = list(_list0)
new_list.append(_list0)
_list0 = []
return new_list
and output is
[['umbellar', 'umbrella'], ['goa', 'ago'], ['gnu', 'gun', 'ung'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['egma', 'game', 'mage'], ['aery', 'ayre', 'eyra', 'year', 'yare']]
精彩评论