开发者

Find the word which all character is matching with other words in python

like umbellar = umbrella both are equal words.

Input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu","eyra","egma","game","leam","amel","year","meal","yare","gun","alme","ung","male","lame","mela","mage" ]

so output should be :

output=[ ["umbellar","umbrella"], ["ago","goa"], ["aery","ayre","eyra","yare","year"开发者_开发问答], ["alem","alme","amel","lame","leam","male","meal","mela"], ["gnu","gun","ung"] ["egma","game","mage"], ]



from itertools import groupby

def group_words(word_list):
    sorted_words = sorted(word_list, key=sorted)
    grouped_words = groupby(sorted_words, sorted)
    for key, words in grouped_words:
        group = list(words)
        if len(group) > 1:
            yield group

Example:

>>> group_words(["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu","eyra","egma","game","leam","amel","year","meal","yare","gun","alme","ung","male","lame","mela","mage" ])
<generator object group_words at 0x0297B5F8>
>>> list(_)
[['umbellar', 'umbrella'], ['egma', 'game', 'mage'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['aery', 'ayre', 'eyra', 'year', 'yare'], ['goa', 'ago'], ['gnu', 'gun', 'ung']]


They're not equal words, they're anagrams.

Anagrams can be found by sorting by character:

sorted('umbellar') == sorted('umbrella')


collections.defaultdict comes in handy:

from collections import defaultdict

input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu",
"eyra","egma","game","leam","amel","year","meal","yare","gun",
"alme","ung","male","lame","mela","mage" ]

D = defaultdict(list)
for i in input:
    key = ''.join(sorted(input))
    D[key].append(i)

output = D.values()

And output is [['umbellar', 'umbrella'], ['goa', 'ago'], ['gnu', 'gun', 'ung'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['egma', 'game', 'mage'], ['aery', 'ayre', 'eyra', 'year', 'yare']]


As others point out you're looking for all the groups of anagrams in your list of words. here you have a possible solution. This algorithm looks for candidates and selects one (first element) as the canonical word, deletes the rest as possible words because anagrams are transitive and once you find that a word belongs to an anagram group you don't need to recompute it again.

input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu",
"eyra","egma","game","leam","amel","year","meal","yare","gun",
"alme","ung","male","lame","mela","mage" ]
res = dict()
for word in input:
    res[word]=[word]
for word in input:
    #the len test is just to avoid sorting and comparing words of different len
    candidates = filter(lambda x: len(x) == len(word) and\
                                  sorted(x) == sorted(word),res.keys())
    if len(candidates):
        canonical = candidates[0]
        for c in candidates[1:]:
            #we delete all candidates expect the canonical/
            del res[c]
            #we add the others to the canonical member
            res[canonical].append(c)
print res.values()

This algth outputs ...

[['year', 'ayre', 'aery', 'yare', 'eyra'], ['umbellar', 'umbrella'],
 ['lame', 'leam', 'mela', 'amel', 'alme', 'alem', 'male', 'meal'],
 ['goa', 'ago'], ['game', 'mage', 'egma'], ['gnu', 'gun', 'ung']]


the answer of Shang is right......but I have been challenged to do same thing without using .... 'groupby()' ....... here it is..... adding the print statements will help you in debugging the code and runtime output....

def group_words(word_list):
    global new_list
    list1 = [] 
    _list0 = []
    _list1 = []
    new_list = []
    for elm in word_list:
        list_elm = list(elm)
        list1.append(list(list_elm))
    for ee in list1:
        ee = sorted(ee)
        ee = ''.join(ee)
        _list1.append(ee)   
    _list1 = list(set(_list1))
    for _e1 in _list1:
        for e0 in word_list:
            if  len(e0) == len(_e1):
                list_e0 = ''.join(sorted(e0))
                if _e1 == list_e0:
                    _list0.append(e0)
                    _list0 = list(_list0)
        new_list.append(_list0)
        _list0 = []
    return new_list

and output is

[['umbellar', 'umbrella'], ['goa', 'ago'], ['gnu', 'gun', 'ung'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['egma', 'game', 'mage'], ['aery', 'ayre', 'eyra', 'year', 'yare']]
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜