python: a way to get an exhaustive, sorted list of keys in a nested dictionary?
exhaustive:
- all keys in the dictionary, even if the keys are in a nested dictionary that is a value to a previous-level dictionary key.sorted:
- this is to ensure the keys are always returned in the same orderThe nesting is arbitrarily deep. A non-recursive algorithm is preferred.
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
Note: dictionary values can include lists (which can have dictionaries as elements),开发者_StackOverflow社区 e.g.
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
def _auxallkeys(aset, adict):
aset.update(adict)
for d in adict.itervalues():
if isinstance(d, dict):
_auxallkeys(aset, d)
def allkeys(adict):
aset = set()
_auxallkeys(aset, adict)
return sorted(aset)
is the obvious (recursive) solution. To eliminate recursion:
def allkeys(adict):
aset = set()
pending = [adict]
while pending:
d = pending.pop()
aset.update(d)
for dd in d.itervalues():
if isinstance(dd, dict):
pending.append(dd)
return sorted(aset)
since the order of processing of the various nested dicts does not matter for this purpose.
Edit: the OP comments whining that it doesn't work if a dict is not nested, but rather in a list (and I replied that it could also be in a tuple, an object with attributes per-instance or per-class [maybe a base class thereof], a shelf, and many other ways to hide dicts around the house;-). If the OP will deign to define precisely what he means by "nested" (obviously not the same meaning as ordinary mortals apply to the word in question), it will probably be easier to help him. Meanwhile, here's a version that covers lists (and tuples, but not generators, instances of many itertools classes, shelves, etc, etc);
def allkeys(adict):
aset = set()
pending = [adict]
pendlis = []
def do_seq(seq):
for dd in seq:
if isinstance(dd, dict):
pending.append(dd)
elif isinstance(dd, (list, tuple)):
pendlis.append(dd)
while pending or pendlis:
while pending:
d = pending.pop()
aset.update(d)
do_seq(d.itervalues())
while pendlis:
l = pendlis.pop()
do_seq(l)
return sorted(aset)
A non-recursive method isn't obvious to me right now. The following works on your original example. Edit: It will now handle dicts within a list within a dict, at least the one within the tricky example cited in the comment to Alex Martelli's answer.
#!/usr/bin/env python
import types
def get_key_list(the_dict, key_list):
for k, v in (the_dict.iteritems()):
key_list.append(k)
if type(v) is types.DictType:
get_key_list(v, key_list)
if type(v) is types.ListType:
for lv in v:
if type(lv) is types.DictType:
get_key_list(lv, key_list)
return
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
key_list = []
get_key_list(level1, key_list)
key_list.sort()
print key_list
key_list = []
get_key_list(tricky, key_list)
key_list.sort()
print key_list
Output:
['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z']
['category', 'content', 'content']
Here's a non-recursive solution which processes generators as well as lists, tuples and dicts and adds all successive keys if a key appears more than once:
def get_iterator(i):
if hasattr(i, 'next'):
# already an iterator - use it as-is!
return i
elif hasattr(i, '__iter__') and not isinstance(i, basestring):
# an iterable type that isn't a string
return iter(i)
else:
# Can't iterate most other types!
return None
def get_dict_keys(D):
LRtn = []
L = [(D, get_iterator(D))]
while 1:
if not L: break
cur, _iter = L[-1]
if _iter:
# Get the next item
try:
i = _iter.next()
except StopIteration:
del L[-1]
continue
if isinstance(cur, dict):
# Process a dict and all subitems
LRtn.append(i)
_iter = get_iterator(cur[i])
if _iter: L.append((cur[i], _iter))
else:
# Process generators, lists, tuples and all subitems
_iter = get_iterator(i)
if _iter: L.append((i, _iter))
# Sort and return
LRtn.sort()
return LRtn
D = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd', 'e': 134, 'f': [{'blah': 553}]} },
'level2_2' : { 'z': 'zzzzzzz' },
'blah2': iter([{'blah3': None}]),
}
print get_dict_keys(D)
EDIT: Increased the speed a bit and made the code shorter.
I also prefer a recursive approach...
#!/usr/bin/env python
def extract_all_keys(structure):
try:
list_of_keys = structure.keys()
for value in structure.values():
add_all_keys_in_value_to_list(value, list_of_keys)
except AttributeError:
list_of_keys = []
return list_of_keys.sort()
def add_all_keys_in_value_to_list(value, list_of_keys):
if isinstance(value, dict):
list_of_keys += extract_all_keys(value)
elif isinstance(value, (list, tuple)):
for element in value:
list_of_keys += extract_all_keys(element)
import unittest
class TestKeys(unittest.TestCase):
def given_a_structure_of(self, structure):
self.structure = structure
def when_keys_are_extracted(self):
self.list_of_keys = extract_all_keys(self.structure)
def testEmptyDict(self):
self.given_a_structure_of({})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, [])
def testOneElement(self):
self.given_a_structure_of({'a': 'aaaa'})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a'])
def testTwoElementsSorted(self):
self.given_a_structure_of({
'z': 'zzzz',
'a': 'aaaa',
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'z'])
def testNestedElements(self):
self.given_a_structure_of({
'a': {'aaaa': 'A',},
'b': {'bbbb': 'B',},
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'aaaa', 'b', 'bbbb'])
def testDoublyNestedElements(self):
self.given_a_structure_of({
'level2': {'aaaa': 'A',
'level3': {'bbbb': 'B'}
}
})
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['aaaa', 'bbbb', 'level2', 'level3'])
def testNestedExampleOnStackOverflow(self):
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
self.given_a_structure_of(level1)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['a', 'b', 'c', 'd', 'level2_1', 'level2_2', 'level3', 'z'])
def testListExampleOnStackOverflow(self):
tricky = {'category': [{'content': 'aaaaa'}, {'content': 'bbbbbb'}]}
self.given_a_structure_of(tricky)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])
def testTuplesTreatedLikeLists(self):
tricky_tuple = {'category': ({'content': 'aaaaa'}, {'content': 'bbbbbb'})}
self.given_a_structure_of(tricky_tuple)
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, ['category', 'content', 'content'])
def testCanHandleString(self):
self.given_a_structure_of('keys')
self.when_keys_are_extracted()
self.assertEquals(self.list_of_keys, [])
if __name__ == '__main__':
unittest.main()
I think it is better to use a recursive method. My code is in the following.
level1 = {
'a' : 'aaaa',
'level2_1' : {'b': 'bbbbb', 'level3': {'c': 'cccc', 'd': 'dddddd'} },
'level2_2' : { 'z': 'zzzzzzz' }
}
all_keys=[] # a global list to store all the keys in level1
def depth ( dict ):
for k in dict:
if type(dict[k]) == type(dict): #judge the type of elements in dictionary
depth(dict[k]) # recursive
else:
all_keys.append(k)
depth(level1)
print all_keys
精彩评论