python: a way to get an exhaustive, sorted list of keys in a nested dictionary?

2022-12-29 07:13 问答作者：

exhaustive:

- all keys in the dictionary, even if the keys are in a nested dictionary that is a value to a previous-level dictionary key.

sorted:

- this is to ensure the keys are always returned in the same order

The nesting is arbitrarily deep. A non-recursive algorithm is preferred.

level1 = {
    'a'         : 'aaaa',
    'level2_1'  : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'}  },
    'level2_2'  : { 'z': 'zzzzzzz' }
}

Note: dictionary values can include lists (which can have dictionaries as elements),开发者_StackOverflow社区 e.g.

tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}

def _auxallkeys(aset, adict):
  aset.update(adict)
  for d in adict.itervalues():
    if isinstance(d, dict):
       _auxallkeys(aset, d)

def allkeys(adict):
  aset = set()
  _auxallkeys(aset, adict)
  return sorted(aset)

is the obvious (recursive) solution. To eliminate recursion:

def allkeys(adict):
  aset = set()
  pending = [adict]
  while pending:
    d = pending.pop()
    aset.update(d)
    for dd in d.itervalues():
      if isinstance(dd, dict):
         pending.append(dd)
  return sorted(aset)

since the order of processing of the various nested dicts does not matter for this purpose.

Edit: the OP comments whining that it doesn't work if a dict is not nested, but rather in a list (and I replied that it could also be in a tuple, an object with attributes per-instance or per-class [maybe a base class thereof], a shelf, and many other ways to hide dicts around the house;-). If the OP will deign to define precisely what he means by "nested" (obviously not the same meaning as ordinary mortals apply to the word in question), it will probably be easier to help him. Meanwhile, here's a version that covers lists (and tuples, but not generators, instances of many itertools classes, shelves, etc, etc);

def allkeys(adict):
  aset = set()
  pending = [adict]
  pendlis = []

  def do_seq(seq):
    for dd in seq:
      if isinstance(dd, dict):
        pending.append(dd)
      elif isinstance(dd, (list, tuple)):
        pendlis.append(dd)

  while pending or pendlis:
    while pending:
      d = pending.pop()
      aset.update(d)
      do_seq(d.itervalues())
    while pendlis:
      l = pendlis.pop()
      do_seq(l)

  return sorted(aset)

A non-recursive method isn't obvious to me right now. The following works on your original example. Edit: It will now handle dicts within a list within a dict, at least the one within the tricky example cited in the comment to Alex Martelli's answer.

#!/usr/bin/env python
import types

def get_key_list(the_dict, key_list):
    for k, v in (the_dict.iteritems()):
        key_list.append(k)
        if type(v) is types.DictType:
            get_key_list(v, key_list)
        if type(v) is types.ListType:
            for lv in v:
                if type(lv) is types.DictType:
                    get_key_list(lv, key_list)
    return

level1 = {
    'a'         : 'aaaa',
    'level2_1'  : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'}  },
    'level2_2'  : { 'z': 'zzzzzzz' }
}
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}

key_list = []
get_key_list(level1, key_list)
key_list.sort()
print key_list

key_list = []
get_key_list(tricky, key_list)
key_list.sort()
print key_list

Output:

['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z']
['category', 'content', 'content']

Here's a non-recursive solution which processes generators as well as lists, tuples and dicts and adds all successive keys if a key appears more than once:

def get_iterator(i):
    if hasattr(i, 'next'):
        # already an iterator - use it as-is!
        return i
    elif hasattr(i, '__iter__') and not isinstance(i, basestring):
        # an iterable type that isn't a string
        return iter(i)
    else: 
        # Can't iterate most other types!
        return None

def get_dict_keys(D):
    LRtn = []
    L = [(D, get_iterator(D))]
    while 1:
        if not L: break
        cur, _iter = L[-1]

        if _iter:
            # Get the next item
            try: 
                i = _iter.next()
            except StopIteration:
                del L[-1]
                continue

        if isinstance(cur, dict): 
            # Process a dict and all subitems
            LRtn.append(i)

            _iter = get_iterator(cur[i])
            if _iter: L.append((cur[i], _iter))
        else:
            # Process generators, lists, tuples and all subitems
            _iter = get_iterator(i)
            if _iter: L.append((i, _iter))

    # Sort and return
    LRtn.sort()
    return LRtn

D = {
    'a'         : 'aaaa',
    'level2_1'  : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd', 'e': 134, 'f': [{'blah': 553}]}  },
    'level2_2'  : { 'z': 'zzzzzzz' },
    'blah2': iter([{'blah3': None}]),
}

print get_dict_keys(D)

EDIT: Increased the speed a bit and made the code shorter.

I also prefer a recursive approach...

#!/usr/bin/env python

def extract_all_keys(structure):
    try:
        list_of_keys = structure.keys()
        for value in structure.values():
            add_all_keys_in_value_to_list(value, list_of_keys)
    except AttributeError:
        list_of_keys = []
    return list_of_keys.sort()


def add_all_keys_in_value_to_list(value, list_of_keys):
    if isinstance(value, dict):
        list_of_keys += extract_all_keys(value)
    elif isinstance(value, (list, tuple)):
        for element in value:
            list_of_keys += extract_all_keys(element)


import unittest

class TestKeys(unittest.TestCase):

    def given_a_structure_of(self, structure):
        self.structure = structure


    def when_keys_are_extracted(self):
        self.list_of_keys = extract_all_keys(self.structure)


    def testEmptyDict(self):
        self.given_a_structure_of({})
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, [])


    def testOneElement(self):
        self.given_a_structure_of({'a': 'aaaa'})
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['a'])


    def testTwoElementsSorted(self):
        self.given_a_structure_of({
            'z': 'zzzz',
            'a': 'aaaa',
            })
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['a', 'z'])


    def testNestedElements(self):
        self.given_a_structure_of({
            'a': {'aaaa': 'A',},
            'b': {'bbbb': 'B',},
            })
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['a', 'aaaa', 'b', 'bbbb'])


    def testDoublyNestedElements(self):
        self.given_a_structure_of({
            'level2': {'aaaa': 'A',
                'level3': {'bbbb': 'B'}
                }
            })
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['aaaa', 'bbbb', 'level2', 'level3'])


    def testNestedExampleOnStackOverflow(self):
        level1 = {
                'a'         : 'aaaa',
                'level2_1'  : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'}  },
                'level2_2'  : { 'z': 'zzzzzzz' }
                }
        self.given_a_structure_of(level1)
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z'])


    def testListExampleOnStackOverflow(self):
        tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
        self.given_a_structure_of(tricky)
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])


    def testTuplesTreatedLikeLists(self):
        tricky_tuple = {'category': ({'content': 'aaaaa'}, {'content': 'bbbbbb'})}
        self.given_a_structure_of(tricky_tuple)
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])


    def testCanHandleString(self):
        self.given_a_structure_of('keys')
        self.when_keys_are_extracted()
        self.assertEquals(self.list_of_keys, [])

if __name__ == '__main__':
    unittest.main()

I think it is better to use a recursive method. My code is in the following.

level1 = {
    'a'         : 'aaaa',
    'level2_1'  : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'}  },
    'level2_2'  : { 'z': 'zzzzzzz' }
}

all_keys=[]       # a global list to store all the keys in level1

def depth ( dict ):
    for k in dict:
        if type(dict[k]) == type(dict):     #judge the type of elements in dictionary  
            depth(dict[k])                  # recursive 
        else:
            all_keys.append(k)

depth(level1)

print all_keys

继续阅读：python

python: a way to get an exhaustive, sorted list of keys in a nested dictionary?

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

Best solution for private video database [closed]

王昌瑞《潜梦追凶》剧组庆生新锐演员未来可期？

Is it allowed to ask users to enter credit card details for own payment method?

Escaping "<" in Perl-generated XML