Find the word which all character is matching with other words in python

2023-02-09 05:02 问答作者：

like umbellar = umbrella both are equal words.

Input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu","eyra","egma","game","leam","amel","year","meal","yare","gun","alme","ung","male","lame","mela","mage" ]

so output should be :

output=[ ["umbellar","umbrella"], ["ago","goa"], ["aery","ayre","eyra","yare","year"开发者_开发问答], ["alem","alme","amel","lame","leam","male","meal","mela"], ["gnu","gun","ung"] ["egma","game","mage"], ]


from itertools import groupby

def group_words(word_list):
    sorted_words = sorted(word_list, key=sorted)
    grouped_words = groupby(sorted_words, sorted)
    for key, words in grouped_words:
        group = list(words)
        if len(group) > 1:
            yield group

Example:

>>> group_words(["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu","eyra","egma","game","leam","amel","year","meal","yare","gun","alme","ung","male","lame","mela","mage" ])
<generator object group_words at 0x0297B5F8>
>>> list(_)
[['umbellar', 'umbrella'], ['egma', 'game', 'mage'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['aery', 'ayre', 'eyra', 'year', 'yare'], ['goa', 'ago'], ['gnu', 'gun', 'ung']]

They're not equal words, they're anagrams.

Anagrams can be found by sorting by character:

sorted('umbellar') == sorted('umbrella')

collections.defaultdict comes in handy:

from collections import defaultdict

input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu",
"eyra","egma","game","leam","amel","year","meal","yare","gun",
"alme","ung","male","lame","mela","mage" ]

D = defaultdict(list)
for i in input:
    key = ''.join(sorted(input))
    D[key].append(i)

output = D.values()

And output is [['umbellar', 'umbrella'], ['goa', 'ago'], ['gnu', 'gun', 'ung'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['egma', 'game', 'mage'], ['aery', 'ayre', 'eyra', 'year', 'yare']]

As others point out you're looking for all the groups of anagrams in your list of words. here you have a possible solution. This algorithm looks for candidates and selects one (first element) as the canonical word, deletes the rest as possible words because anagrams are transitive and once you find that a word belongs to an anagram group you don't need to recompute it again.

input = ["umbellar","goa","umbrella","ago","aery","alem","ayre","gnu",
"eyra","egma","game","leam","amel","year","meal","yare","gun",
"alme","ung","male","lame","mela","mage" ]
res = dict()
for word in input:
    res[word]=[word]
for word in input:
    #the len test is just to avoid sorting and comparing words of different len
    candidates = filter(lambda x: len(x) == len(word) and\
                                  sorted(x) == sorted(word),res.keys())
    if len(candidates):
        canonical = candidates[0]
        for c in candidates[1:]:
            #we delete all candidates expect the canonical/
            del res[c]
            #we add the others to the canonical member
            res[canonical].append(c)
print res.values()

This algth outputs ...

[['year', 'ayre', 'aery', 'yare', 'eyra'], ['umbellar', 'umbrella'],
 ['lame', 'leam', 'mela', 'amel', 'alme', 'alem', 'male', 'meal'],
 ['goa', 'ago'], ['game', 'mage', 'egma'], ['gnu', 'gun', 'ung']]

the answer of Shang is right......but I have been challenged to do same thing without using .... 'groupby()' ....... here it is..... adding the print statements will help you in debugging the code and runtime output....

def group_words(word_list):
    global new_list
    list1 = [] 
    _list0 = []
    _list1 = []
    new_list = []
    for elm in word_list:
        list_elm = list(elm)
        list1.append(list(list_elm))
    for ee in list1:
        ee = sorted(ee)
        ee = ''.join(ee)
        _list1.append(ee)   
    _list1 = list(set(_list1))
    for _e1 in _list1:
        for e0 in word_list:
            if  len(e0) == len(_e1):
                list_e0 = ''.join(sorted(e0))
                if _e1 == list_e0:
                    _list0.append(e0)
                    _list0 = list(_list0)
        new_list.append(_list0)
        _list0 = []
    return new_list

and output is

[['umbellar', 'umbrella'], ['goa', 'ago'], ['gnu', 'gun', 'ung'], ['alem', 'leam', 'amel', 'meal', 'alme', 'male', 'lame', 'mela'], ['egma', 'game', 'mage'], ['aery', 'ayre', 'eyra', 'year', 'yare']]

继续阅读：python

Find the word which all character is matching with other words in python

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？