Select a range of dates in Python dictionary
I have the following dictionary:
history = {
"2008-11-17": 41,
"2010-05-28": 82,
"2008-11-14": 47,
"2008-11-13": 60,
"2008-11-12": 56,
"2008-11-11": 55,
"2008-11-10": 98,
"2008-11-19": 94,
"2008-11-18": 94,
"2004-05-27": 82,
"2004-05-26": 45,
"2004-05-25": 70,
# there's more ...
}
How do I define a generator function get_records(dict_history, str_from_date, str_to_date)
to yield date: record
entries?
I know how to convert datetime
objects to any string format I want. 开发者_JAVA技巧However, my major pain points in this hurdle are:
dict
s aren't ordered.dict
keys are strings.- The dates are not continuous.
So far, this is what I can think of:
from datetime import datetime, timedelta
def get_records(history, start_date, end_date):
fmt = "%Y-%m-%d"
dt = timedelta(days=1)
present_date = datetime.strptime(start_date, fmt)
end_date = datetime.strptime(end_date, fmt)
while present_date <= end_date:
present_string = present_date.strftime(fmt)
try:
yield (present_string, history[present_string])
except KeyError:
pass
present_date += dt
Is there a more efficient way to do that?
UPDATE (2011 Aug 2)
I found aSortedCollection
class at ActiveState, also by Raymond Hettinger.I'd just iterate over the dictionary and return the items that match:
def get_records(history, start_date, end_date):
for date, entry in history.iteritems():
if start_date <= date <= end_date:
yield date, entry
Note that your particular date format allows direct string comparison with <
and >
without converting to a datetime
instance first.
Also note that the given function will return the matching items in no particular order.
How about:
def get_records(history, start_date, end_date, format = "%Y-%m-%d"):
present_date = datetime.strptime(start_date, format)
end_date = datetime.strptime(end_date, format)
return [(key, value) for key, value in history.items() if present_date <= datetime.strptime(history[key], format) <= end_date]
def get_records(history, str_from_date, str_to_date)
return sorted((k,v) for k,v in history.iteritems() if str_from_date<=k<=str_to_date)
This only passes through the line of dates once, at the cost of sorting the list first.
from datetime import datetime, timedelta
def get_records(history, start_date, end_date):
fmt = "%Y-%m-%d"
start_date = datetime.strptime(start_date, fmt)
end_date = datetime.strptime(end_date, fmt)
dt = history.iteritems()
dt = sorted(dt, key= lambda date: datetime.strptime(date[0], fmt))
for date in dt:
if datetime.strptime(date[0],fmt) > end_date:
break
elif datetime.strptime(date[0],fmt) >= start_date:
yield(date[0], history[date[0]])
else:
pass
history = { "2008-11-17": 41,
"2010-05-28": 82,
"2008-11-14": 47,
"2008-11-13": 60,
"2008-11-12": 56,
"2008-11-11": 55,
"2008-11-10": 98,
"2008-11-19": 94,
"2008-11-18": 94,
"2004-05-27": 82,
"2004-05-26": 45,
"2004-05-25": 70 }
def get_records(dict_history, str_from_date, str_to_date):
for k,v in sorted(dict_history.items()):
if k>str_to_date:
break
if k>=str_from_date:
yield (k,v)
print history.items()
print
print list( get_records(history, '2005-05-21', '2008-12-25'))
The dates are strings 'yyyy-mm-jj'
Sorting lexicographically these strings produces the same result as sorting them on the basis of the dates they represent.
sorted(dict_history.items()) is a list of tuples. Python sorts this list according the first elements of tuples.
Each key in the dictionary being unique , there is no ambiguity in this sorting.
Edit 1
Answering to your performance concern:
history = { "2008-11-17": 41,
"2010-05-28": 82,
"2008-11-14": 47,
"2008-11-13": 60,
"2008-11-12": 56,
"2008-11-11": 55,
"2008-11-11": 02,
"2008-11-10": 98,
"2008-11-19": 94,
"2008-11-18": 94,
"2004-05-27": 82,
"2004-05-26": 45,
"2004-05-25": 70 }
import bisect
def get_records(dict_history, str_from_date, str_to_date):
sorted_keys = sorted(dict_history.iterkeys())
start = bisect.bisect_left(sorted_keys,str_from_date)
end = bisect.bisect_right(sorted_keys,str_to_date)
for date in sorted(dict_history.iteritems())[start:end]:
yield date
print history.items()
print
print list( get_records(history, '2005-05-21', '2008-12-25'))
精彩评论