Positional Rankings and Dealing with Ties in Python
(I apologize that previous versions of this question displayed the wrong function that I need to fix, this has been remedied and I hope the question makes a little more sense now.)
I ha开发者_Python百科ve a list of objects with scores, and I'm attempting to assign rank to them based on those scores. Below is basically how I output my data.
sorted_scores = [
('Apolo Ohno', 0),
('Shanie Davis', -1),
('Bodie Miller', -2),
('Lindsay Vohn', -3),
('Shawn White', -3),
('Bryan Veloso', -4)
]
I have a tie. Now, the function that assigns positions to the objects above right now is a simple for loop that just assigns the value of i
as the object's final position.
positions = {}
i = 1
for key, value in sorted_list:
# Since in my codebase the strings are IDs, I use the key to fetch the object.
if value is not None:
positions[key] = i
i += 1
So that'll obviously return:
positions = {
'Apolo Ohno': 1,
'Shanie Davis': 2,
'Bodie Miller': 3,
'Lindsay Vohn': 4,
'Shawn White': 5,
'Bryan Veloso': 6
}
Hopefully that makes some sense. The meat of the question is that loop. What makes more sense is if it returns them like so:
positions = {
'Apolo Ohno': 1,
'Shanie Davis': 2,
'Bodie Miller': 3,
'Lindsay Vohn': 4, # Same value.
'Shawn White': 4, # Same value.
'Bryan Veloso': 6
}
How would I edit the function above to do that, keeping in mind that I could have any number of ties at any given time depending on how many of my members ranked said object? The highest rank should be 1, so it can be displayed as such: <rank>/<total # of people>
Thanks in advance. :)
>>> sorted_scores = [
... ('Apolo Ohno', 0),
... ('Shanie Davis', -1),
... ('Bodie Miller', -2),
... ('Lindsay Vohn', -3),
... ('Shawn White', -3),
... ('Bryan Veloso',-4)
... ]
>>>
>>> res = {}
>>> prev = None
>>> for i,(k,v) in enumerate(sorted_scores):
... if v!=prev:
... place,prev = i+1,v
... res[k] = place
...
>>> print res
{'Apolo Ohno': 1, 'Bryan Veloso': 6, 'Shanie Davis': 2, 'Lindsay Vohn': 4, 'Bodie Miller': 3, 'Shawn White': 4}
Remember that dicts are unordered, so to iterate in order of place, you need to do this
>>> from operator import itemgetter
>>> print sorted(res.items(),key=itemgetter(1))
[('Apolo Ohno', 1), ('Shanie Davis', 2), ('Bodie Miller', 3), ('Lindsay Vohn', 4), ('Shawn White', 4), ('Bryan Veloso', 6)]
=== Update after change/clarification of specs ===
# coding: ascii
def ranks_from_scores(sorted_scores):
"""sorted_scores: a list of tuples (object_id, score), sorted by score DESCENDING
return a mapping of object IDs to ranks
"""
ranks = {}
previous_score = object()
for index, (obj_id, score) in enumerate(sorted_scores):
if score != previous_score:
previous_score = score
rank = index + 1
ranks[obj_id] = rank
return ranks
from operator import itemgetter
import pprint
scores0 = dict([
('Apolo Ohno', 0),
('Shanie Davis', -1),
('Bodie Miller', -2),
('Lindsay Vohn', -3),
('Shawn White', -3)
])
scores1 = {
'lorem': 100,
'ipsum': 200,
'dolor': 300,
'sit': 300,
'amet': 300,
'quia': 400,
'consectetur': 500,
'adipiscing': 500,
'elit': 600,
}
scores2 = {
'lorem': 100,
'ipsum': 200,
'dolor': 300,
'sit': 300,
'amet': 300,
'quia': 400,
'consectetur': 500,
'adipiscing': 500,
'elit': 6000,
}
import pprint
funcs = (ranks_from_scores, ) # Watch this space!
tests = (scores0, scores1, scores2)
for test in tests:
print
test_list = sorted(test.items(), key=itemgetter(1), reverse=True)
print "Input:", test_list
for func in funcs:
result = func(test_list)
print "%s ->" % func.__name__
pprint.pprint(result)
Results:
Input: [('Apolo Ohno', 0), ('Shanie Davis', -1), ('Bodie Miller', -2), ('Lindsay
Vohn', -3), ('Shawn White', -3)]
ranks_from_scores ->
{'Apolo Ohno': 1,
'Bodie Miller': 3,
'Lindsay Vohn': 4,
'Shanie Davis': 2,
'Shawn White': 4}
Input: [('elit', 600), ('consectetur', 500), ('adipiscing', 500), ('quia', 400),
('dolor', 300), ('sit', 300), ('amet', 300), ('ipsum', 200), ('lorem', 100)]
ranks_from_scores ->
{'adipiscing': 2,
'amet': 5,
'consectetur': 2,
'dolor': 5,
'elit': 1,
'ipsum': 8,
'lorem': 9,
'quia': 4,
'sit': 5}
Input: [('elit', 6000), ('consectetur', 500), ('adipiscing', 500), ('quia', 400)
, ('dolor', 300), ('sit', 300), ('amet', 300), ('ipsum', 200), ('lorem', 100)]
ranks_from_scores ->
{'adipiscing': 2,
'amet': 5,
'consectetur': 2,
'dolor': 5,
'elit': 1,
'ipsum': 8,
'lorem': 9,
'quia': 4,
'sit': 5}
=== original submission ===
This code assumes that you really want the highest score to get rank 1, not the lowest score getting rank 1 (or 0!).
# coding: ascii
def ranks_from_scores(scores, debug=False):
"""scores (a mapping of object IDs to scores)
return a mapping of object IDs to ranks
"""
alist = [(v, k) for k, v in scores.items()]
alist.sort(reverse=True)
if debug: print 'alist:', alist
bdict = {}
previous_score = object()
for posn, (score, obj_id) in enumerate(alist):
if score != previous_score:
previous_score = score
rank = posn + 1
bdict[obj_id] = rank
if debug:
print 'bdict:', bdict
blist = [(v, k) for k, v in bdict.items()]
print 'blist:', sorted(blist)
return bdict
ranks_from_scores(
{'q': 10, 'w': 20, 'e': 20, 'r': 20, 't': 30},
debug=True,
)
Output:
alist: [(30, 't'), (20, 'w'), (20, 'r'), (20, 'e'), (10, 'q')]
bdict: {'q': 5, 'r': 2, 'e': 2, 't': 1, 'w': 2}
blist: [(1, 't'), (2, 'e'), (2, 'r'), (2, 'w'), (5, 'q')]
The way to do this is not to calculate the element's position is some arbitrary sequence, but rather to calculate how many other elements have a better score.
EDIT:
By popular demand, O(n)'ed and everything:
positions = {}
cur_score = None # Score we're examining
cur_count = 0 # Number of others that we've seen with this score
for ix, (name, score) in enumerate(sorted_scores):
if score == cur_score: # Same score for this player as previous
cur_count += 1
else: # Different score from before
cur_score = score
cur_count = 0
positions[name] = ix - cur_count + 1 # Add 1 because ix is 0-based
print positions
Looks like you can use the sorted
and enumerate
builtins, the groupby
method from itertools
and the itemgetter
method from operator
. Assumes higher scores are better... (if lower scores are better, change reverse=True
to reverse=False
)
>>> from itertools import groupby
>>> from operator import itemgetter
>>> scores = {
... 'lorem': 100,
... 'ipsum': 200,
... 'dolor': 300,
... 'sit': 300,
... 'amet': 300,
... 'quia': 400,
... 'consectetur': 500,
... 'adipiscing': 500,
... 'elit': 600,
... }
>>> sorted_items = sorted(scores.items(), key=itemgetter(1), reverse=True)
>>> groups = groupby(sorted_items, itemgetter(1))
>>> for rank, (score, items) in enumerate(groups):
... print rank+1, map(itemgetter(0), items)
...
1 ['elit']
2 ['consectetur', 'adipiscing']
3 ['quia']
4 ['dolor', 'sit', 'amet']
5 ['ipsum']
6 ['lorem']
Solution
Here's one simple way to do it by modifying your code a little rather than importing modules:
prev = None
rank = 0
incr = 1
positions = {}
for key, value in sorted_list:
if value is not None:
if value != prev:
rank += incr
incr = 1
else:
incr += 1
positions[key] = rank
prev = value
A Test
For
sorted_list = [
('Apolo Ohno', 0),
('Shanie Davis', -1),
('Bodie Miller', -2),
('Lindsay Vohn', -3),
('Shawn White', -3),
('Bryan Veloso',-4)
]
I get positions as:
{'Apolo Ohno': 1,
'Shanie Davis': 2,
'Bodie Miller': 3,
'Lindsay Vohn': 4,
'Shawn White': 4,
'Bryan Veloso': 6}
which I think is what you are going for even though you aren't quite clear about whether there should be a 6 after the two 4's.
Here's an approach that combines aspects of a few of the other solutions into a flexible generator function.
def rank_sorted(sequence, start=1, key=None, reverse=True):
"""A combination of `enumerate` and `sorted` iterators that deals
with tied ranks.
"""
previous_value = object() # won't compare equal to anything
sorted_iterator = sorted(sequence, key=key, reverse=reverse)
for index, item in enumerate(sorted_iterator, start=start):
# use key function to choose value if given
if key is None:
value = item
else:
value = key(item)
# only update rank when sort value changes
if value != previous_value:
previous_value = value
rank = index
yield rank, item
You can call with different values for start
, key
, and reverse
to allow for ranks to start at 0 or 1, to pass a custom key function (like itemgetter(1)
for sorting dictionaries by value), and to easily switch to having lower scores considered higher ranking. Using the example in the original question:
from operator import itemgetter
sorted_scores = [
('Apolo Ohno', 0),
('Shanie Davis', -1),
('Bodie Miller', -2),
('Lindsay Vohn', -3),
('Shawn White', -3),
('Bryan Veloso', -4)
]
higher_is_better = dict(
(name, rank)
for rank, (name, score)
in rank_sorted(sorted_scores, key=itemgetter(1))
)
# {'Apolo Ohno': 1, 'Bryan Veloso': 6, 'Shanie Davis': 2, 'Lindsay Vohn': 4, 'Bodie Miller': 3, 'Shawn White': 4}
lower_is_better = dict(
(name, rank)
for rank, (name, score)
in rank_sorted(sorted_scores, key=itemgetter(1), reverse=False)
)
# {'Apolo Ohno': 6, 'Bryan Veloso': 1, 'Shanie Davis': 5, 'Lindsay Vohn': 2, 'Bodie Miller': 4, 'Shawn White': 2}
Same as the best answer, just keeping the ordinals instead of skiping to the next position at the ranking: i.e.: ranking = [1,2,3,4,4,5] instead of ranking = [1,2,3,4,4,6]
sorted_scores = [
('Apolo Ohno', 0),
('Shanie Davis', -1),
('Bodie Miller', -2),
('Lindsay Vohn', -3),
('Shawn White', -3),
('Bryan Veloso',-4)
]
res = {}
prev = None
n = 0
for k,v in sorted_scores:
if v!=prev:
n +=1
place,prev = n,v
res[k] = place
print (res)
{'Apolo Ohno': 1, 'Shanie Davis': 2, 'Bodie Miller': 3, 'Lindsay Vohn': 4, 'Shawn White': 4, 'Bryan Veloso': 5}
>>> sorted_scores = [
... ('Apolo Ohno', 0),
... ('Shanie Davis', -1),
... ('Bodie Miller', -2),
... ('Lindsay Vohn', -3),
... ('Shawn White', -3),
... ('Bryan Veloso',-4)
... ]
>>>
>>> from itertools import groupby
>>> from operator import itemgetter
>>>
>>> place=1
>>> res={}
>>> for _,items in groupby(sorted_scores,key=itemgetter(1)):
... for i,item in enumerate(items):
... res[item[0]]= place
... place+=i+1
...
>>> print res
{'Apolo Ohno': 1, 'Bryan Veloso': 6, 'Shanie Davis': 2, 'Lindsay Vohn': 4, 'Bodie Miller': 3, 'Shawn White': 4}
Remember that dicts are unordered, so to iterate in order of place, you need to do this
>>> print sorted(res.items(),key=itemgetter(1))
[('Apolo Ohno', 1), ('Shanie Davis', 2), ('Bodie Miller', 3), ('Lindsay Vohn', 4), ('Shawn White', 4), ('Bryan Veloso', 6)]
Here is a simple way to do it
last = None
position = 0
delta = 1
for key, value in sorted_list:
if value is not None:
if value != last:
position += delta
delta = 1
else:
delta += 1
# i believe this is supposed to be [key] not [value] in OP's code
positions[key] = position
last = value
I'm having to make a bunch of assumptions about what it is you want to do, but here's an attempt:
scores = {
'lorem': 100,
'ipsum': 200,
'dolor': 300,
'sit': 300,
'amet': 300,
'quia': 400,
'consectetur': 500,
'adipiscing': 500,
'elit': 600,
}
groups = {}
for (member, score) in scores.items():
if score not in groups:
groups[score] = [member]
else:
groups[score].append(member)
positions = {}
for (rank, (score, members)) in enumerate(groups.items()):
for member in members:
positions[member] = rank
Expanded for detail, to show the working:
>>> import pprint
>>> scores = {
... 'lorem': 100,
... 'ipsum': 200,
... 'dolor': 300,
... 'sit': 300,
... 'amet': 300,
... 'quia': 400,
... 'consectetur': 500,
... 'adipiscing': 500,
... 'elit': 600,
... }
>>> groups = {}
>>> for (member, score) in scores.items():
... if score not in groups:
... groups[score] = [member]
... else:
... groups[score].append(member)
...
>>> pprint.pprint(groups)
{100: ['lorem'],
200: ['ipsum'],
300: ['sit', 'dolor', 'amet'],
400: ['quia'],
500: ['consectetur', 'adipiscing'],
600: ['elit']}
>>> positions = {}
>>> for (rank, (score, members)) in enumerate(groups.items()):
... for member in members:
... positions[member] = rank
...
>>> pprint.pprint(positions)
{'adipiscing': 4,
'amet': 2,
'consectetur': 4,
'dolor': 2,
'elit': 5,
'ipsum': 1,
'lorem': 0,
'quia': 3,
'sit': 2}
>>> pprint.pprint(sorted(positions.items(), key=lambda i: i[1]))
[('lorem', 0),
('ipsum', 1),
('sit', 2),
('dolor', 2),
('amet', 2),
('quia', 3),
('consectetur', 4),
('adipiscing', 4),
('elit', 5)]
精彩评论