开发者

how to fill the start and the end of a unique entry with the same value in numpy array?

i have a 1D numpy array. Lets look at the following example

a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3]

In the above array the unique entries are considered except the value which is 255. We want to fill values between each unique entry.

the resultant will look 开发者_C百科like

[255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3]    

could very easily be done. Looking for the pythonic way of doing it.

Thanks a lot


have no idea what being pythonic means here, but just my two cents,

import numpy as np    

a = np.array([255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3])

# find the locations of the unique numbers
b = np.where(a != 255)[0]
# find out what the unique numbers are
u = a[b]

for i,v in zip(b, u):
    try:
        if (v == vlast): # found a sandwich
            if (i != ilast+1): # make sure it has something in between 
                a[ilast+1: i] = v
        else: # make current unique value as the beginning of next sandwich
            vlast, ilast = v, i
    except NameError:
        # initialize the first match
        vlast, ilast = v, i

print(a)

it gives the right answer:

[255   1   1   1   1 255 255 255   2   2   2   2   2   2   2   3   3   3   3   3]


I used groupby function from itertools module.

Also I used window function from here.

from __future__ import print_function
from  itertools import tee, izip, groupby

a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3]

def groupby2(iterable):
    '''Used to convert to the second iterable element of "groupby" result to list'''
    for i in groupby(iterable):
        yield (i[0],list(i[1]))


def window(iterable,n):
    els = tee(iterable,n)
    for i,el in enumerate(els):
        for _ in range(i):
            next(el, None)
    return izip(*els)

def compress(iterable):
    it = window(groupby2(iterable),3)
    #Creates the iterator which yield the elements in the following manner: (255, [255]), (1, [1]), (255, [255, 255])

    for ge in it:
        flag = False #Reset the flag
        print('\nWindow: {}'.format(ge))

        for value in ge[0][1]: #Yield all the values of the first element of the window
                print('A: {}'.format(value))
                yield value

        if ge[1][0]==255 and ge[0][0]==ge[2][0]: #The central element of the window has to be replaced
            flag = True #Flag for correct last window processing        

            for _ in ge[1][1]: #Replacing the central element of the window
                print('B: {}'.format(ge[0][0]))
                yield ge[0][0]

            next(it,None) #Skip 1 element of the 'it' (which will be advanced by 1 element by for-loop, giving 2 net advances).   

    #Processing the last 2 elements of the last window.
    if flag==False: #The central element of the last window hasn't been processed. Proccessing.
        for value in ge[1][1]:
            print('C: {}'.format(value))
            yield value
    for value in ge[2][1]: #The last element of the window.
        print('D: {}'.format(value))
        yield value


print('\nInput: {}'.format(a))
output = list(compress((a)))
print('Proram output: {}'.format(output))
print('Goal output  : {}'.format([255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3]))

The code is with debugging messages. I'll live them here since they make it easier to understand how it works. Just delete them if you don't need them.

The output is:

Input: [255, 1, 255, 255, 1, 255, 255, 255, 2, 2, 255, 255, 255, 2, 2, 3, 255, 255, 255, 3]

Window: ((255, [255]), (1, [1]), (255, [255, 255]))
A: 255

Window: ((1, [1]), (255, [255, 255]), (1, [1]))
A: 1
B: 1
B: 1

Window: ((1, [1]), (255, [255, 255, 255]), (2, [2, 2]))
A: 1

Window: ((255, [255, 255, 255]), (2, [2, 2]), (255, [255, 255, 255]))
A: 255
A: 255
A: 255

Window: ((2, [2, 2]), (255, [255, 255, 255]), (2, [2, 2]))
A: 2
A: 2
B: 2
B: 2
B: 2

Window: ((2, [2, 2]), (3, [3]), (255, [255, 255, 255]))
A: 2
A: 2

Window: ((3, [3]), (255, [255, 255, 255]), (3, [3]))
A: 3
B: 3
B: 3
B: 3
D: 3
Proram output: [255, 1, 1, 1, 1, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]
Goal output  : [255, 1, 1, 1, 1, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]

Update Here is a re-factored version:

from __future__ import print_function
from  itertools import tee, izip, groupby

def groupby2(iterable):
    for i in groupby(iterable):
        yield (i[0],len(tuple(i[1])))


def window(iterable,n):
    els = tee(iterable,n)
    for i,el in enumerate(els):
        for _ in range(i):
            next(el, None)
    return izip(*els)


def subs(iterable):
    it = window(groupby2(iterable),3)
    for left, middle, right in it:
        yield [left[0]]*left[1]
        if middle[0]==255 and left[0]==right[0]:
            yield [left[0]]*middle[1]
            next(it,None)
    if not(middle[0]==255 and left[0]==right[0]):
        yield [middle[0]]*middle[1]
    yield [right[0]]*right[1]


def chained(iterable):
    for L in subs(iterable):
        for el in L:
            yield el


a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3]        
print('\nInput: {}'.format(a))
output = list(chained((a)))
print('Proram output: {}'.format(output))
print('Goal output  : {}'.format([255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3]))


A shorter numpy-based solution:

import numpy
a = numpy.array([255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3])

b = [(i, numpy.argmax(a == i), len(a) - numpy.argmax(a[::-1] == i)) for i in numpy.unique(a[a < 255])]

for i in b:
    a[i[1]:i[2]] = i[0]

where b is a list of tuples consisting of (unique value, start index, end index + 1).


The other solution is to use a window function with 2 items and ifilterfalse on the list of enumerated values.

from __future__ import print_function
from  itertools import tee, izip, ifilterfalse


def window(iterable,n):
    els = tee(iterable,n)
    for i,el in enumerate(els):
        for _ in range(i):
            next(el, None)
    return izip(*els)


def replace(iterable,placeholder=255):
    it = enumerate(iterable)

    def save_last(iterable):
        for i in iterable:
            yield i
        replace.last_index = i[0] #Save the last value
    it = save_last(it)

    it = ifilterfalse(lambda x: x[1]==placeholder, it)
    for i,(left,right) in enumerate(window(it,2)):
        if i==0:
            for j in range(left[0]):
                yield placeholder
        yield left[1]
        if right[0]>left[0]+1:
            if left[1]==right[1]:
                for _ in range(right[0]-left[0]-1):
                    yield left[1]
            else:
                for _ in range(right[0]-left[0]-1):
                    yield placeholder
    yield right[1]
    if right[0]<replace.last_index:
        for i in range(replace.last_index-right[0]):
            yield placeholder


a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3,255,255]        
print('\nInput: {}'.format(a))
output = list(replace(a))
print('Proram output: {}'.format(output))
print('Goal output  : {}'.format([255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3,255,255]))

Here I explain how it works.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜