Python index more than once
I know that .index()
will return where a substring is located in python.
However, what I want is to find where a substring is located for the nth time, which would work like this:
>> s 开发者_Go百科= 'abcdefacbdea'
>> s.index('a')
0
>> s.nindex('a', 1)
6
>>s.nindex('a', 2)
11
Is there a way to do this in python?
How about...
def nindex(mystr, substr, n=0, index=0):
for _ in xrange(n+1):
index = mystr.index(substr, index) + 1
return index - 1
Obs: as str.index()
does, nindex()
raises ValueError
when the substr is not found.
def nindex(needle, haystack, index=1):
parts = haystack.split(needle)
position = 0
length = len(needle)
for i in range(index - 1):
position += len(parts[i]) + length
return position
I'm interested to see other solutions, I don't feel that this is particularly pythonic.
I would probably use
[index for index, value in enumerate(s) if s == 'a'][n]
or
from itertools import islice
next(islice((index for index, value in enumerate(s) if s == 'a'), n, None))
or avoid dealing in indices at all.
Here's a memoized version that avoids wasted work as much as possible while maintaining something close [1] to your specs (rather than doing something saner such as looping through all hits;-)...:
[1]: just close -- can't have a new .nindex
method in strings as you require, of course!-)
def nindex(haystack, needle, nrep=1, _memo={}):
if nrep < 1:
raise ValueError('%r < 1' % (nrep,))
k = needle, haystack
if k in _memo:
where = _memo[k]
else:
where = _memo[k] = [-1]
while len(where) <= nrep:
if where[-1] is None:
return -1
w = haystack.find(needle, where[-1] + 1)
if w < 0:
where.append(None)
return -1
where.append(w)
return where[nrep]
s = 'abcdefacbdea'
print nindex(s, 'a')
print nindex(s, 'a', 2)
print nindex(s, 'a', 3)
print 0, then 6, then 11, as requested.
>>> from re import finditer, escape
>>> from itertools import count, izip
>>> def nfind(s1, s2, n=1):
... """return the index of the nth nonoverlapping occurance of s2 in s1"""
... return next(j.start() for i,j in izip(count(1), finditer(escape(s2),s1)) if i==n)
...
>>> nfind(s,'a')
0
>>> nfind(s,'a',2)
6
>>> nfind(s,'a',3)
11
Yes. Write a loop using s.index('yourstring', start)
Update after finding a big fat -1 ... didn't I write some code???
Here's my attempt at redemption, which allows non-overlapping if desired, and is tested to the extent shown:
>>> def nindex(haystack, needle, n, overlapping=True):
... delta = 1 if overlapping else max(1, len(needle))
... start = -delta
... for _unused in xrange(n):
... start = haystack.index(needle, start+delta)
... return start
...
>>> for n in xrange(1, 11):
... print n, nindex('abcdefacbdea', 'a', n)
...
1 0
2 6
3 11
4
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<stdin>", line 5, in nindex
ValueError: substring not found
>>> for olap in (True, False):
... for n in (1, 2):
... print str(olap)[0], n, nindex('abababab', 'abab', n, olap)
...
T 1 0
T 2 2
F 1 0
F 2 4
>>> for n in xrange(1, 8):
... print n, nindex('abcde', '', n)
...
1 0
2 1
3 2
4 3
5 4
6 5
7
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<stdin>", line 5, in nindex
ValueError: substring not found
>>>
def nindex(str, substr, index):
slice = str
n = 0
while index:
n += slice.index(substr) + len(substr)
slice = str[n:]
index -= 1
return slice.index(substr) + n
import re
def nindex(text, n=1, default=-1):
return next(
itertools.islice((m.start() for m in re.finditer('a', text)), n - 1, None),
default
)
print nindex(s)
print nindex(s, 1)
print nindex(s, 2)
print nindex(s, 3)
print nindex(s, 4)
def ifind( s, word, start=0 ):
pos = s.find(word,start)
while -1 < pos:
yield pos
pos = s.find(word,pos+1)
print list(ifind('abcdefacbdea', 'a')) # [0, 6, 11]
print list(ifind('eee', 'a')) # []
How about...
# index is 0-based
def nindex(needle, haystack, index=0):
parts = haystack.split(needle)
if index >= len(parts)-1:
return -1
return sum(len(x) for x in parts[:index+1])+index*len(needle)
import itertools
def multis(search,text,start=0):
while start>-1:
f=text.find(search,start)
start=f
if start>-1:
yield f
start+=1
# one based function for nth result only
def nindex(text,search,n):
return itertools.islice(multis(search,text),n-1,n).next()
text = 'abcdefacbdea'
search = 'a'
print("Hit %i: %i" % (3, nindex(text,search,3)))
print ('All hits: %s' % list(multis(search,text)))
Without indexes:
def nthpartition(search,text,n=None):
## nth partition before and after or all if not n
if not n:
n=len(text) # bigger always than maximum number of n
for i in range(n):
before,search,text = text.partition(search)
if not search:
return
yield before,text
text = 'abcdefacbdea'
search = 'a'
print("Searching %r in %r" % (search,text))
for parts in nthpartition(search,text): print(parts)
"""Output:
Searching 'a' in 'abcdefacbdea'
('', 'bcdefacbdea')
('bcdef', 'cbdea')
('cbde', '')
"""
Just call 'index' repeatedly, using the result of the last call (+ 1) as start position:
def nindex(needle, haystack, n):
"find the nth occurrence of needle in haystack"
pos = -1
for dummy in range(n):
pos = haystack.index(needle, pos + 1)
return pos
Note: I have not tested it.
This one does the work in regex.. which is NOT (AFTER TESTING) potentially faster if you modified it to cache the compiled regex (or memoize it).
import re
def nindex(s, substr, n = 1):
"""Find the nth occurrence of substr in s."""
safe_substr = re.escape(substr)
regex_str = ".*?(?:%s.*?){%i}(%s).*?" % (safe_substr, n - 1, safe_substr)
regex = re.compile(regex_str)
match = regex.search(s)
if match is None:
index = None
else:
index = match.start(1)
return index
# The rest of this code is just test cases...
for search_str in ("a", "bc"):
print "Looking for %s" % search_str
for test_str in ('abcdefacbdea',
'abcdefacbdeaxxx',
'xxxabcdefacbdeaxxx'):
for i in (0, 1, 2, 3, 4):
print("%s %i index: %s" %
(test_str, i, nindex(test_str, search_str, i)))
print
Output is:
Looking for a
abcdefacbdea 0 index: None
abcdefacbdea 1 index: 0
abcdefacbdea 2 index: 6
abcdefacbdea 3 index: 11
abcdefacbdea 4 index: None
abcdefacbdeaxxx 0 index: None
abcdefacbdeaxxx 1 index: 0
abcdefacbdeaxxx 2 index: 6
abcdefacbdeaxxx 3 index: 11
abcdefacbdeaxxx 4 index: None
xxxabcdefacbdeaxxx 0 index: None
xxxabcdefacbdeaxxx 1 index: 3
xxxabcdefacbdeaxxx 2 index: 9
xxxabcdefacbdeaxxx 3 index: 14
xxxabcdefacbdeaxxx 4 index: None
Looking for bc
abcdefacbdea 0 index: None
abcdefacbdea 1 index: 1
abcdefacbdea 2 index: None
abcdefacbdea 3 index: None
abcdefacbdea 4 index: None
abcdefacbdeaxxx 0 index: None
abcdefacbdeaxxx 1 index: 1
abcdefacbdeaxxx 2 index: None
abcdefacbdeaxxx 3 index: None
abcdefacbdeaxxx 4 index: None
xxxabcdefacbdeaxxx 0 index: None
xxxabcdefacbdeaxxx 1 index: 4
xxxabcdefacbdeaxxx 2 index: None
xxxabcdefacbdeaxxx 3 index: None
xxxabcdefacbdeaxxx 4 index: None
Here's the memoized version:
def memoized_hedgehog_nindex(s, substr, n = 1, _memoized_regexes = {}):
safe_substr = re.escape(substr)
regex_str = ".*?(?:%s.*?){%i}(%s).*?" % (safe_substr, n - 1, safe_substr)
# memoize
key = (n, safe_substr)
if key in _memoized_regexes:
regex = _memoized_regexes[key]
else:
regex = re.compile(regex_str)
_memoized_regexes[key] = regex
match = regex.search(s)
if match is None:
index = None
else:
index = match.start(1)
return index
精彩评论