how to fill the start and the end of a unique entry with the same value in numpy array?
i have a 1D numpy array. Lets look at the following example
a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3]
In the above array the unique entries are considered except the value which is 255. We want to fill values between each unique entry.
the resultant will look 开发者_C百科like
[255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3]
could very easily be done. Looking for the pythonic way of doing it.
Thanks a lot
have no idea what being pythonic means here, but just my two cents,
import numpy as np
a = np.array([255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3])
# find the locations of the unique numbers
b = np.where(a != 255)[0]
# find out what the unique numbers are
u = a[b]
for i,v in zip(b, u):
try:
if (v == vlast): # found a sandwich
if (i != ilast+1): # make sure it has something in between
a[ilast+1: i] = v
else: # make current unique value as the beginning of next sandwich
vlast, ilast = v, i
except NameError:
# initialize the first match
vlast, ilast = v, i
print(a)
it gives the right answer:
[255 1 1 1 1 255 255 255 2 2 2 2 2 2 2 3 3 3 3 3]
I used groupby
function from itertools
module.
Also I used window
function from here.
from __future__ import print_function
from itertools import tee, izip, groupby
a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3]
def groupby2(iterable):
'''Used to convert to the second iterable element of "groupby" result to list'''
for i in groupby(iterable):
yield (i[0],list(i[1]))
def window(iterable,n):
els = tee(iterable,n)
for i,el in enumerate(els):
for _ in range(i):
next(el, None)
return izip(*els)
def compress(iterable):
it = window(groupby2(iterable),3)
#Creates the iterator which yield the elements in the following manner: (255, [255]), (1, [1]), (255, [255, 255])
for ge in it:
flag = False #Reset the flag
print('\nWindow: {}'.format(ge))
for value in ge[0][1]: #Yield all the values of the first element of the window
print('A: {}'.format(value))
yield value
if ge[1][0]==255 and ge[0][0]==ge[2][0]: #The central element of the window has to be replaced
flag = True #Flag for correct last window processing
for _ in ge[1][1]: #Replacing the central element of the window
print('B: {}'.format(ge[0][0]))
yield ge[0][0]
next(it,None) #Skip 1 element of the 'it' (which will be advanced by 1 element by for-loop, giving 2 net advances).
#Processing the last 2 elements of the last window.
if flag==False: #The central element of the last window hasn't been processed. Proccessing.
for value in ge[1][1]:
print('C: {}'.format(value))
yield value
for value in ge[2][1]: #The last element of the window.
print('D: {}'.format(value))
yield value
print('\nInput: {}'.format(a))
output = list(compress((a)))
print('Proram output: {}'.format(output))
print('Goal output : {}'.format([255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3]))
The code is with debugging messages. I'll live them here since they make it easier to understand how it works. Just delete them if you don't need them.
The output is:
Input: [255, 1, 255, 255, 1, 255, 255, 255, 2, 2, 255, 255, 255, 2, 2, 3, 255, 255, 255, 3]
Window: ((255, [255]), (1, [1]), (255, [255, 255]))
A: 255
Window: ((1, [1]), (255, [255, 255]), (1, [1]))
A: 1
B: 1
B: 1
Window: ((1, [1]), (255, [255, 255, 255]), (2, [2, 2]))
A: 1
Window: ((255, [255, 255, 255]), (2, [2, 2]), (255, [255, 255, 255]))
A: 255
A: 255
A: 255
Window: ((2, [2, 2]), (255, [255, 255, 255]), (2, [2, 2]))
A: 2
A: 2
B: 2
B: 2
B: 2
Window: ((2, [2, 2]), (3, [3]), (255, [255, 255, 255]))
A: 2
A: 2
Window: ((3, [3]), (255, [255, 255, 255]), (3, [3]))
A: 3
B: 3
B: 3
B: 3
D: 3
Proram output: [255, 1, 1, 1, 1, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]
Goal output : [255, 1, 1, 1, 1, 255, 255, 255, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]
Update Here is a re-factored version:
from __future__ import print_function
from itertools import tee, izip, groupby
def groupby2(iterable):
for i in groupby(iterable):
yield (i[0],len(tuple(i[1])))
def window(iterable,n):
els = tee(iterable,n)
for i,el in enumerate(els):
for _ in range(i):
next(el, None)
return izip(*els)
def subs(iterable):
it = window(groupby2(iterable),3)
for left, middle, right in it:
yield [left[0]]*left[1]
if middle[0]==255 and left[0]==right[0]:
yield [left[0]]*middle[1]
next(it,None)
if not(middle[0]==255 and left[0]==right[0]):
yield [middle[0]]*middle[1]
yield [right[0]]*right[1]
def chained(iterable):
for L in subs(iterable):
for el in L:
yield el
a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3]
print('\nInput: {}'.format(a))
output = list(chained((a)))
print('Proram output: {}'.format(output))
print('Goal output : {}'.format([255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3]))
A shorter numpy-based solution:
import numpy
a = numpy.array([255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3])
b = [(i, numpy.argmax(a == i), len(a) - numpy.argmax(a[::-1] == i)) for i in numpy.unique(a[a < 255])]
for i in b:
a[i[1]:i[2]] = i[0]
where b
is a list of tuples consisting of (unique value, start index, end index + 1)
.
The other solution is to use a window function with 2 items and ifilterfalse
on the list of enumerated values.
from __future__ import print_function
from itertools import tee, izip, ifilterfalse
def window(iterable,n):
els = tee(iterable,n)
for i,el in enumerate(els):
for _ in range(i):
next(el, None)
return izip(*els)
def replace(iterable,placeholder=255):
it = enumerate(iterable)
def save_last(iterable):
for i in iterable:
yield i
replace.last_index = i[0] #Save the last value
it = save_last(it)
it = ifilterfalse(lambda x: x[1]==placeholder, it)
for i,(left,right) in enumerate(window(it,2)):
if i==0:
for j in range(left[0]):
yield placeholder
yield left[1]
if right[0]>left[0]+1:
if left[1]==right[1]:
for _ in range(right[0]-left[0]-1):
yield left[1]
else:
for _ in range(right[0]-left[0]-1):
yield placeholder
yield right[1]
if right[0]<replace.last_index:
for i in range(replace.last_index-right[0]):
yield placeholder
a = [255,1,255,255,1,255,255,255,2,2,255,255,255,2,2,3,255,255,255,3,255,255]
print('\nInput: {}'.format(a))
output = list(replace(a))
print('Proram output: {}'.format(output))
print('Goal output : {}'.format([255,1,1,1,1,255,255,255,2,2,2,2,2,2,2,3,3,3,3,3,255,255]))
Here I explain how it works.
精彩评论