How to repeat elements of an array along two axes?

2023-04-10 13:37 问答作者：

I want to repeat elements of an array along axis 0 and axis 1 for M and N times respectively:

import numpy as np

a = np.arange(12).reshape(3, 4)
b = a.repeat(2, 0).repeat(2, 1)
print(开发者_高级运维b)

[[ 0  0  1  1  2  2  3  3]
 [ 0  0  1  1  2  2  3  3]
 [ 4  4  5  5  6  6  7  7]
 [ 4  4  5  5  6  6  7  7]
 [ 8  8  9  9 10 10 11 11]
 [ 8  8  9  9 10 10 11 11]]

This works, but I want to know are there better methods without create a temporary array.

You could use the Kronecker product, see numpy.kron:

>>> a = np.arange(12).reshape(3,4)
>>> print(np.kron(a, np.ones((2,2), dtype=a.dtype)))
[[ 0  0  1  1  2  2  3  3]
 [ 0  0  1  1  2  2  3  3]
 [ 4  4  5  5  6  6  7  7]
 [ 4  4  5  5  6  6  7  7]
 [ 8  8  9  9 10 10 11 11]
 [ 8  8  9  9 10 10 11 11]]

Your original method is OK too, though!

You can make use of np.broadcast_to here:

def broadcast_tile(a, h, w):
    x, y = a.shape
    m, n = x * h, y * w
    return np.broadcast_to(
        a.reshape(x, 1, y, 1), (x, h, y, w)
    ).reshape(m, n)

broadcast_tile(a, 2, 2)

array([[ 0,  0,  1,  1,  2,  2,  3,  3],
       [ 0,  0,  1,  1,  2,  2,  3,  3],
       [ 4,  4,  5,  5,  6,  6,  7,  7],
       [ 4,  4,  5,  5,  6,  6,  7,  7],
       [ 8,  8,  9,  9, 10, 10, 11, 11],
       [ 8,  8,  9,  9, 10, 10, 11, 11]])

Performance

Functions

def chris(a, h, w):
    x, y = a.shape
    m, n = x * h, y * w
    return np.broadcast_to(
        a.reshape(x, 1, y, 1), (x, h, y, w)
    ).reshape(m, n)

def alex_riley(a, b0, b1):
    r, c = a.shape
    rs, cs = a.strides
    x = np.lib.stride_tricks.as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0))
    return x.reshape(r*b0, c*b1)

def paul_panzer(a, b0, b1):
    r, c = a.shape
    out = np.empty((r, b0, c, b1), a.dtype)
    out[...] = a[:, None, :, None]
    return out.reshape(r*b0, c*b1)

def wim(a, h, w):
    return np.kron(a, np.ones((h,w), dtype=a.dtype))

Setup

import numpy as np
import pandas as pd
from timeit import timeit

res = pd.DataFrame(
       index=['chris', 'alex_riley', 'paul_panzer', 'wim'],
       columns=[5, 10, 20, 50, 100, 500, 1000],
       dtype=float
)

a = np.arange(100).reshape((10,10))

for f in res.index:
    for c in res.columns:
        h = w = c
        stmt = '{}(a, h, w)'.format(f)
        setp = 'from __main__ import h, w, a, {}'.format(f)
        res.at[f, c] = timeit(stmt, setp, number=50)

Output

How to repeat elements of an array along two axes?

Since the result cannot be implemented as a view, as_strided offers no benefits over simple preallocation and broadcasting. Because of its overhead as_strided seems in fact a bit slower (I did no proper benchmarking, though).

The as_strided code is taken from @AlexRiley's post.

from numpy.lib.stride_tricks import as_strided
import numpy as np

def tile_array(a, b0, b1):
    r, c = a.shape                                    # number of rows/columns
    rs, cs = a.strides                                # row/column strides 
    x = as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0)) # view a as larger 4D array
    return x.reshape(r*b0, c*b1)                      # create new 2D array

def tile_array_pp(a, b0, b1):
    r, c = a.shape
    out = np.empty((r, b0, c, b1), a.dtype)
    out[...] = a[:, None, :, None]
    return out.reshape(r*b0, c*b1)

a = np.arange(9).reshape(3, 3)

kwds = {'globals': {'f_ar': tile_array, 'f_pp': tile_array_pp, 'a': a},
        'number': 1000}

from timeit import timeit

print('as_strided', timeit('f_ar(a, 100, 100)', **kwds))
print('broadcast ', timeit('f_pp(a, 100, 100)', **kwds))

Sample run:

as_strided 0.048387714981799945
broadcast  0.04324757700669579

Another solution is to use as_strided. kron is much slower then using repeat twice. I have found that as_strided is much faster than a double repeat in many cases (small arrays [<250x250] with only a doubling in each dimension as_strided was slower). The as_strided trick is as follows:

a = arange(1000000).reshape((1000, 1000)) # dummy data

from numpy.lib.stride_tricks import as_strided
N, M = 4,3 # number of time to replicate each point in each dimension
H, W = a.shape
b = as_strided(a, (H, N, W, M), (a.strides[0], 0, a.strides[1], 0)).reshape((H*N, W*M))

This works by using 0-length strides which causes numpy to read the same value multiple times (until it gets to the next dimension). The final reshape does copy the data, but only once unlike using a double repeat which will copy the data twice.

Errata: I'm only taking 2x upsampling into account.

TL;DR It turns out that after the OpenCV version,

np.repeat(np.repeat(a, 2, axis=1), 2, axis=0)

is the fastest. So the answer is - there's no faster ways in numpy today, but you can get a slight improvement by changing the order of axes.

And if you don't mind OpenCV -

cv.resize(a, None, fx=2, fy=2, interpolation=cv.INTER_NEAREST)

Here is the test.

import timeit
import numpy as np
import cv2 as cv
test = np.zeros((16, 16, 3), dtype=np.float32)

def measure(f):
    t = timeit.timeit("f(test)", number=1000, globals={"test": test, "f": f})
    print("%s - %f"%(f.__name__, t))
    return f, t

def fastest(c):
    print(c.__name__)
    winner, t = min((measure(getattr(c, ve)) for ve in dir(c) if ve.startswith("alg_")), key=lambda x: x[1])
    print("%s winner: %s - %f"%(c.__name__, winner.__name__, t))
    return winner

@fastest
class nn:
    def alg_01(a):
        return np.repeat(np.repeat(a, 2, axis=0), 2, axis=1)
    def alg_02(a):
        return np.repeat(np.repeat(a, 2, axis=1), 2, axis=0)
    def alg_03(a):
        b = a[:, None, :, None]
        b = np.concatenate((b, b), axis=1)
        b = np.concatenate((b, b), axis=3)
        return b.reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_04(a):
        b = a[:, None, :, None]
        b = np.concatenate((b, b), axis=3)
        b = np.concatenate((b, b), axis=1)
        return b.reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_05(a):
        return (a[:, None, :, None]*np.ones((1, 2, 1, 2)+((1,)*len(a.shape[2:])), dtype=np.float32)).reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_06(a):
        return cv.resize(a, None, fx=2, fy=2, interpolation=cv.INTER_NEAREST)
    def alg_07(a):
        return a[:, None, :, None][:, (0, 0)][:, :, :, (0, 0)].reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_08(a):
        return a[:, None, :, None][:, :, :, (0, 0)][:, (0, 0)].reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_09(a):
        return np.kron(a, np.ones((2, 2), dtype=np.float32))
    def alg_10(a):
        return np.broadcast_to(a[:, None, :, None], (a.shape[0], 2, a.shape[1], 2)+a.shape[2:]).reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_11(a):
        ret = np.empty((a.shape[0], 2, a.shape[1], 2, *a.shape[2:]), dtype=np.float32)
        ret[...] = a[:, None, :, None]
        ret.resize((a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:]), refcheck=False)
        return ret

The result is:

nn
alg_01 - 0.040967
alg_02 - 0.033744
alg_03 - 0.057969
alg_04 - 0.048739
alg_05 - 0.076595
alg_06 - 0.078638
alg_07 - 0.084692
alg_08 - 0.084539
alg_09 - 0.344339
alg_10 - 0.078707
alg_11 - 0.049424
nn winner: alg_02 - 0.033744

继续阅读：numpy python

How to repeat elements of an array along two axes?

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集 河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？