find largest submatrix algorithm

2022-12-20 22:13 问答作者：

I have an N*N matrix (N开发者_开发知识库=2 to 10000) of numbers that may range from 0 to 1000. How can I find the largest (rectangular) submatrix that consists of the same number?

Example:

     1  2  3  4  5
    -- -- -- -- --
1 | 10  9  9  9 80
2 |  5  9  9  9 10
3 | 85 86 54 45 45
4 | 15 21  5  1  0
5 |  5  6 88 11 10

The output should be the area of the submatrix, followed by 1-based coordinates of its top left element. For the example, it would be (6, 2, 1) because there are six 9s situated at column 2, row 1.

This is a work in progress

I thought about this problem and I think I may have a O(w*h) algorithm.

The idea goes like this:

for any (i,j) compute the highest number of cells with the same value in the column j starting from (i,j). Store this values as heights[i][j].
create an empty vector of sub matrix (a lifo)
for all row: i
- for all column: j
  - pop all sub matrix whose height > heights[i][j]. Because the submatrix with height > heights[i][j] cannot continue on this cell
  - push a submatrix defined by (i,j,heights[i][j]) where j is the farest coordinate where we can fit a submatrix of height: heights[i][j]
  - update the current max sub matrix

The tricky part is in the inner loop. I use something similar to the max subwindow algorithm to ensure it is O(1) on average for each cell.

I will try to formulate a proof but in the meantime here is the code.

#include <algorithm>
#include <iterator>
#include <iostream>
#include <ostream>
#include <vector>

typedef std::vector<int>   row_t;
typedef std::vector<row_t> matrix_t;

std::size_t height(matrix_t const& M) { return M.size(); }
std::size_t width (matrix_t const& M) { return M.size() ? M[0].size() : 0u; }

std::ostream& operator<<(std::ostream& out, matrix_t const& M) {
  for(unsigned i=0; i<height(M); ++i) {
    std::copy(begin(M[i]), end(M[i]),
          std::ostream_iterator<int>(out, ", "));
    out << std::endl;
  }
  return out;
}

struct sub_matrix_t {
  int i, j, h, w;
  sub_matrix_t(): i(0),j(0),h(0),w(1) {}
  sub_matrix_t(int i_,int j_,int h_,int w_):i(i_),j(j_),h(h_),w(w_) {}
  bool operator<(sub_matrix_t const& rhs) const { return (w*h)<(rhs.w*rhs.h); }
};


// Pop all sub_matrix from the vector keeping only those with an height
// inferior to the passed height.
// Compute the max sub matrix while removing sub matrix with height > h
void pop_sub_m(std::vector<sub_matrix_t>& subs,
           int i, int j, int h, sub_matrix_t& max_m) {

  sub_matrix_t sub_m(i, j, h, 1);

  while(subs.size() && subs.back().h >= h) {
    sub_m = subs.back();
    subs.pop_back();
    sub_m.w = j-sub_m.j;
    max_m = std::max(max_m, sub_m);
  }

  // Now sub_m.{i,j} is updated to the farest coordinates where there is a
  // submatrix with heights >= h

  // If we don't cut the current height (because we changed value) update
  // the current max submatrix
  if(h > 0) {
    sub_m.h = h;
    sub_m.w = j-sub_m.j+1;
    max_m = std::max(max_m, sub_m);
    subs.push_back(sub_m);
  }
}

void push_sub_m(std::vector<sub_matrix_t>& subs,
        int i, int j, int h, sub_matrix_t& max_m) {
  if(subs.empty() || subs.back().h < h)
    subs.emplace_back(i, j, h, 1);
}

void solve(matrix_t const& M, sub_matrix_t& max_m) {
  // Initialize answer suitable for an empty matrix
  max_m = sub_matrix_t();
  if(height(M) == 0 || width(M) == 0) return;

  // 1) Compute the heights of columns of the same values
  matrix_t heights(height(M), row_t(width(M), 1));
  for(unsigned i=height(M)-1; i>0; --i)
    for(unsigned j=0; j<width(M); ++j)
      if(M[i-1][j]==M[i][j])
    heights[i-1][j] = heights[i][j]+1;

  // 2) Run through all columns heights to compute local sub matrices
  std::vector<sub_matrix_t> subs;
  for(int i=height(M)-1; i>=0; --i) {
    push_sub_m(subs, i, 0, heights[i][0], max_m);
    for(unsigned j=1; j<width(M); ++j) {
      bool same_val  = (M[i][j]==M[i][j-1]);
      int pop_height = (same_val) ? heights[i][j] : 0;
      int pop_j      = (same_val) ? j             : j-1;
      pop_sub_m (subs, i, pop_j, pop_height,    max_m);
      push_sub_m(subs, i, j,     heights[i][j], max_m);
    }
    pop_sub_m(subs, i, width(M)-1, 0, max_m);
  }
}

matrix_t M1{
  {10,  9,  9,  9, 80},
  { 5,  9,  9,  9, 10},
  {85, 86, 54, 45, 45},
  {15, 21,  5,  1,  0},
  { 5,  6, 88, 11, 10},
};

matrix_t M2{
  {10, 19,  9, 29, 80},
  { 5,  9,  9,  9, 10},
  { 9,  9, 54, 45, 45},
  { 9,  9,  5,  1,  0},
  { 5,  6, 88, 11, 10},
};


int main() {
  sub_matrix_t answer;

  std::cout << M1 << std::endl;
  solve(M1, answer);
  std::cout << '(' << (answer.w*answer.h)
        << ',' << (answer.j+1) << ',' << (answer.i+1) << ')'
        << std::endl;

  answer = sub_matrix_t();
  std::cout << M2 << std::endl;
  solve(M2, answer);
  std::cout << '(' << (answer.w*answer.h)
        << ',' << (answer.j+1) << ',' << (answer.i+1) << ')'
        << std::endl;
}

This is an order Rows*Columns Solution

It works by

starting at the bottom of the array, and determining how many items below each number match it in a column. This is done in O(MN) time (very trivially)
Then it goes top to bottom & left to right and sees if any given number matches the number to the left. If so, it keeps track of how the heights relate to each other to track the possible rectangle shapes

Here is a working python implementation. Apologies since I'm not sure how to get the syntax highlighting working

# this program finds the largest area in an array where all the elements have the same value
# It solves in O(rows * columns) time  using  O(rows*columns) space using dynamic programming




def max_area_subarray(array):

    rows = len(array)
    if (rows == 0):
        return [[]]
    columns = len(array[0])


    # initialize a blank new array
    # this will hold max elements of the same value in a column
    new_array = []
    for i in range(0,rows-1):
        new_array.append([0] * columns)

    # start with the bottom row, these all of 1 element of the same type 
    # below them, including themselves
    new_array.append([1] * columns)

    # go from the second to bottom row up, finding how many contiguous
    # elements of the same type there are
    for i in range(rows-2,-1,-1):
        for j in range(columns-1,-1,-1):
            if ( array[i][j] == array[i+1][j]):
                new_array[i][j] = new_array[i+1][j]+1
            else:
                new_array[i][j] = 1


    # go left to right and match up the max areas
    max_area = 0
    top = 0
    bottom = 0
    left = 0
    right = 0
    for i in range(0,rows):
        running_height =[[0,0,0]]
        for j in range(0,columns):

            matched = False
            if (j > 0):  # if this isn't the leftmost column
                if (array[i][j] == array[i][j-1]):
                    # this matches the array to the left
                    # keep track of if this is a longer column, shorter column, or same as 
                    # the one on the left
                    matched = True

                    while( new_array[i][j] < running_height[-1][0]):
                        # this is less than the one on the left, pop that running 
                        # height from the list, and add it's columns to the smaller
                        # running height below it
                        if (running_height[-1][1] > max_area):
                            max_area = running_height[-1][1]
                            top = i
                            right = j-1
                            bottom = i + running_height[-1][0]-1
                            left = j - running_height[-1][2]

                        previous_column = running_height.pop()
                        num_columns = previous_column[2]

                        if (len(running_height) > 0):
                            running_height[-1][1] += running_height[-1][0] * (num_columns)
                            running_height[-1][2] += num_columns

                        else:
                            # for instance, if we have heights 2,2,1
                            # this will trigger on the 1 after we pop the 2 out, and save the current
                            # height of 1,  the running area of 3, and running columsn of 3
                            running_height.append([new_array[i][j],new_array[i][j]*(num_columns),num_columns])


                    if (new_array[i][j] > running_height[-1][0]):
                        # longer then the one on the left
                        # append this height and area
                        running_height.append([new_array[i][j],new_array[i][j],1])
                    elif (new_array[i][j] == running_height[-1][0]):   
                        # same as the one on the left, add this area to the one on the left
                        running_height[-1][1] += new_array[i][j]
                        running_height[-1][2] += 1



            if (matched == False or j == columns -1):
                while(running_height):
                    # unwind the maximums & see if this is the new max area
                    if (running_height[-1][1] > max_area):
                        max_area = running_height[-1][1]
                        top = i
                        right = j
                        bottom = i + running_height[-1][0]-1
                        left = j - running_height[-1][2]+1

                        # this wasn't a match, so move everything one bay to the left
                        if (matched== False):
                            right = right-1
                            left = left-1


                    previous_column = running_height.pop()
                    num_columns = previous_column[2]
                    if (len(running_height) > 0):
                        running_height[-1][1] += running_height[-1][0] * num_columns
                        running_height[-1][2] += num_columns

            if (matched == False):
                # this is either the left column, or we don't match to the column to the left, so reset
                running_height = [[new_array[i][j],new_array[i][j],1]]
                if (running_height[-1][1] > max_area):
                    max_area = running_height[-1][1]
                    top = i
                    right = j
                    bottom = i + running_height[-1][0]-1
                    left = j - running_height[-1][2]+1


    max_array = []
    for i in range(top,bottom+1):
        max_array.append(array[i][left:right+1])


    return max_array



numbers = [[6,4,1,9],[5,2,2,7],[2,2,2,1],[2,3,1,5]]

for row in numbers:
    print row

print
print

max_array =  max_area_subarray(numbers)    


max_area = len(max_array) * len(max_array[0])
print 'max area is ',max_area
print 
for row in max_array:
    print row

继续阅读：algorithm submatrix

find largest submatrix algorithm

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？