Python index more than once

2023-01-09 13:53 问答作者：

I know that .index() will return where a substring is located in python. However, what I want is to find where a substring is located for the nth time, which would work like this:

>> s 开发者_Go百科= 'abcdefacbdea'
>> s.index('a')
0
>> s.nindex('a', 1)
6
>>s.nindex('a', 2)
11

Is there a way to do this in python?

How about...

def nindex(mystr, substr, n=0, index=0):
    for _ in xrange(n+1):
        index = mystr.index(substr, index) + 1
    return index - 1

Obs: as str.index() does, nindex() raises ValueError when the substr is not found.

def nindex(needle, haystack, index=1):
     parts = haystack.split(needle)
     position = 0
     length = len(needle)
     for i in range(index - 1):
         position += len(parts[i]) + length
     return position

I'm interested to see other solutions, I don't feel that this is particularly pythonic.

I would probably use

[index for index, value in enumerate(s) if s == 'a'][n]

from itertools import islice
next(islice((index for index, value in enumerate(s) if s == 'a'), n, None))

or avoid dealing in indices at all.

Here's a memoized version that avoids wasted work as much as possible while maintaining something close [1] to your specs (rather than doing something saner such as looping through all hits;-)...:

[1]: just close -- can't have a new .nindex method in strings as you require, of course!-)

def nindex(haystack, needle, nrep=1, _memo={}):
  if nrep < 1:
    raise ValueError('%r < 1' % (nrep,))
  k = needle, haystack
  if k in _memo:
    where = _memo[k]
  else:
    where = _memo[k] = [-1]
  while len(where) <= nrep:
    if where[-1] is None:
      return -1
    w = haystack.find(needle, where[-1] + 1)
    if w < 0:
      where.append(None)
      return -1
    where.append(w)
  return where[nrep]

s = 'abcdefacbdea'
print nindex(s, 'a')
print nindex(s, 'a', 2)
print nindex(s, 'a', 3)

print 0, then 6, then 11, as requested.

>>> from re import finditer, escape
>>> from itertools import count, izip

>>> def nfind(s1, s2, n=1):
...    """return the index of the nth nonoverlapping occurance of s2 in s1"""
...    return next(j.start() for i,j in izip(count(1), finditer(escape(s2),s1)) if i==n)
...
>>> nfind(s,'a')
0
>>> nfind(s,'a',2)
6
>>> nfind(s,'a',3)
11

Yes. Write a loop using s.index('yourstring', start)

Update after finding a big fat -1 ... didn't I write some code???

Here's my attempt at redemption, which allows non-overlapping if desired, and is tested to the extent shown:

>>> def nindex(haystack, needle, n, overlapping=True):
...    delta = 1 if overlapping else max(1, len(needle))
...    start = -delta
...    for _unused in xrange(n):
...       start = haystack.index(needle, start+delta)
...    return start
...
>>> for n in xrange(1, 11):
...    print n, nindex('abcdefacbdea', 'a', n)
...
1 0
2 6
3 11
4
Traceback (most recent call last):
  File "<stdin>", line 2, in <module>
  File "<stdin>", line 5, in nindex
ValueError: substring not found
>>> for olap in (True, False):
...    for n in (1, 2):
...       print str(olap)[0], n, nindex('abababab', 'abab', n, olap)
...
T 1 0
T 2 2
F 1 0
F 2 4
>>> for n in xrange(1, 8):
...    print n, nindex('abcde', '', n)
...
1 0
2 1
3 2
4 3
5 4
6 5
7
Traceback (most recent call last):
  File "<stdin>", line 2, in <module>
  File "<stdin>", line 5, in nindex
ValueError: substring not found
>>>

def nindex(str, substr, index):
  slice = str
  n = 0
  while index:
    n += slice.index(substr) + len(substr)
    slice = str[n:]
    index -= 1
  return slice.index(substr) + n

import re

def nindex(text, n=1, default=-1):
    return next(
        itertools.islice((m.start() for m in re.finditer('a', text)), n - 1, None),
        default
    )

print nindex(s)
print nindex(s, 1)
print nindex(s, 2)
print nindex(s, 3)
print nindex(s, 4)

def ifind( s, word, start=0 ):
    pos = s.find(word,start)
    while -1 < pos:
        yield pos
        pos = s.find(word,pos+1)

print list(ifind('abcdefacbdea', 'a'))     # [0, 6, 11]
print list(ifind('eee', 'a'))              # []

How about...

# index is 0-based
def nindex(needle, haystack, index=0):
     parts = haystack.split(needle)
     if index >= len(parts)-1:
         return -1
     return sum(len(x) for x in parts[:index+1])+index*len(needle)

import itertools
def multis(search,text,start=0):
    while start>-1:
        f=text.find(search,start)
        start=f
        if start>-1:
            yield f
            start+=1

# one based function for nth result only
def nindex(text,search,n):
    return itertools.islice(multis(search,text),n-1,n).next()

text = 'abcdefacbdea'
search = 'a'
print("Hit %i: %i" % (3, nindex(text,search,3)))
print ('All hits: %s' % list(multis(search,text)))

Without indexes:

def nthpartition(search,text,n=None):
    ## nth partition before and after or all if not n
    if not n:
        n=len(text) # bigger always than maximum number of n
    for i in range(n):
        before,search,text = text.partition(search)
        if not search:
            return
        yield before,text

text = 'abcdefacbdea'
search = 'a'
print("Searching %r in %r" % (search,text))

for parts in nthpartition(search,text): print(parts)
"""Output:
Searching 'a' in 'abcdefacbdea'
('', 'bcdefacbdea')
('bcdef', 'cbdea')
('cbde', '')
"""

Just call 'index' repeatedly, using the result of the last call (+ 1) as start position:

def nindex(needle, haystack, n):
"find the nth occurrence of needle in haystack"
  pos = -1
  for dummy in range(n):
    pos = haystack.index(needle, pos + 1)
  return pos

Note: I have not tested it.

This one does the work in regex.. which is NOT (AFTER TESTING) potentially faster if you modified it to cache the compiled regex (or memoize it).

import re

def nindex(s, substr, n = 1):
    """Find the nth occurrence of substr in s."""
    safe_substr = re.escape(substr) 
    regex_str = ".*?(?:%s.*?){%i}(%s).*?" % (safe_substr, n - 1, safe_substr)
    regex = re.compile(regex_str)
    match = regex.search(s)    
    if match is None:
        index = None
    else:
        index = match.start(1)        
    return index


# The rest of this code is just test cases...
for search_str in ("a", "bc"):
    print "Looking for %s" % search_str
    for test_str in ('abcdefacbdea',
                     'abcdefacbdeaxxx',
                     'xxxabcdefacbdeaxxx'):
        for i in (0, 1, 2, 3, 4):      
            print("%s %i index: %s" % 
                  (test_str, i, nindex(test_str, search_str, i)))
    print

Output is:

Looking for a
abcdefacbdea 0 index: None
abcdefacbdea 1 index: 0
abcdefacbdea 2 index: 6
abcdefacbdea 3 index: 11
abcdefacbdea 4 index: None
abcdefacbdeaxxx 0 index: None
abcdefacbdeaxxx 1 index: 0
abcdefacbdeaxxx 2 index: 6
abcdefacbdeaxxx 3 index: 11
abcdefacbdeaxxx 4 index: None
xxxabcdefacbdeaxxx 0 index: None
xxxabcdefacbdeaxxx 1 index: 3
xxxabcdefacbdeaxxx 2 index: 9
xxxabcdefacbdeaxxx 3 index: 14
xxxabcdefacbdeaxxx 4 index: None

Looking for bc
abcdefacbdea 0 index: None
abcdefacbdea 1 index: 1
abcdefacbdea 2 index: None
abcdefacbdea 3 index: None
abcdefacbdea 4 index: None
abcdefacbdeaxxx 0 index: None
abcdefacbdeaxxx 1 index: 1
abcdefacbdeaxxx 2 index: None
abcdefacbdeaxxx 3 index: None
abcdefacbdeaxxx 4 index: None
xxxabcdefacbdeaxxx 0 index: None
xxxabcdefacbdeaxxx 1 index: 4
xxxabcdefacbdeaxxx 2 index: None
xxxabcdefacbdeaxxx 3 index: None
xxxabcdefacbdeaxxx 4 index: None

Here's the memoized version:

def memoized_hedgehog_nindex(s, substr, n = 1, _memoized_regexes = {}):
    safe_substr = re.escape(substr) 
    regex_str = ".*?(?:%s.*?){%i}(%s).*?" % (safe_substr, n - 1, safe_substr)

    # memoize
    key = (n, safe_substr)
    if key in _memoized_regexes:
        regex = _memoized_regexes[key]
    else:
        regex = re.compile(regex_str)
        _memoized_regexes[key] = regex

    match = regex.search(s)    
    if match is None:
        index = None
    else:
        index = match.start(1)        
    return index

继续阅读：indexing python string substring

Python index more than once

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？