开发者

The intersection of two sorted arrays

Given two sorted arrays: A and B. The size 开发者_如何学编程of array A is La and the size of array B is Lb. How to find the intersection of A and B?

If La is much bigger than Lb, then will there be any difference for the intersection finding algorithm?


Since this looks like a HW...I'll give you the algorithm:

Let arr1,arr2 be the two sorted arrays of length La and Lb.
Let i be index into the array arr1.
Let j be index into the array arr2.
Initialize i and j to 0.

while(i < La and j < Lb) do

    if(arr1[i] == arr2[j]) { // found a common element.
        print arr[i] // print it.
        increment i // move on.
        increment j
    }
    else if(arr1[i] > arr2[j])
        increment j // don't change i, move j.
    else
        increment i // don't change j, move i.
end while


I've been struggling with same problem for a while now, so far I came with:

  1. Linear matching which will yield O(m+n) in worst case. You basically keep two pointers (A and B) each pointing to beginning of each array. Then advance pointer which points to smaller value, until you reach end of one of arrays, that would indicate no intersection. If at any point you have *A == *B - here comes your intersection.

  2. Binary matching. Which yields ~ O(n*log(m)) in worst case. You basically pick smaller array and perform binary search in bigger array of all elements of the smaller array. If you want to be more fancy, you can even use last position where binary search failed and use it as starting point for next binary search. This way you marginally improve worst case but for some sets it might perform miracles :)

  3. Double binary matching. It's a variation of regular binary matching. Basically you get element from the middle of smaller array and do binary search in bigger array. If you find nothing then you cut smaller array in half (and yes you can toss element you already used) and cut bigger array in half (use binary search failure point). And then repeat for each pair. Results are better than O(n*log(m)) but I am too lazy to calculate what they are.

Those are two most basic ones. Both have merits. Linear is a bit easier to implement. Binary one is arguably faster (although there are plenty of cases when linear matching will outperform binary).

If anyone knows anything better than that I would love to hear it. Matching arrays is what I do these days.

P.S. don't quote me on terms "linear matching" and "binary matching" as I made them up myself and there are probably fancy name for it already.


Use set_intersection as here. The usual implementation would work similar to the merge part of merge-sort algorithm.


void Intersect()
{
    int la, lb;
    la = 5;
    lb = 100;
    int A[5];
    int i, j, k;
    i = j = k = 0;
    for (; i < 5; ++i)
        A[i] = i + 1;
    int B[100];
    for (; j < 100; ++j)
        B[j] = j + 2;
    int newSize = la < lb ? la : lb;
    int* C = new int[newSize];
    i = j = 0;
    for (; k < lb && i < la && j < lb; ++k)
    {
        if (A[i] < B[j])
            i++;
        else if (A[i] > B[j])
            j++;
        else
        {
            C[k] = A[i];
            i++;
            j++;
        }
    }
    for (k = 0; k < newSize; ++k)
        cout << C[k] << NEWLINE;
}


This is in Java, but it does what you want. It implements variant 3 mentioned in Nazar's answer (a "double" binary search) and should be the fastest solution. I'm pretty sure this beats any kind of "galloping" approach. Galloping just wastes time getting started with small steps while we jump right in with a top-down binary search.

It's not immediately obvious which complexity class applies here. We do binary searches in the longer array, but never look at the same element twice, so we are definitely within O(m+n).

This code has been thoroughly tested with random data.

import java.util.Arrays;

// main function. may return null when result is empty
static int[] intersectSortedIntArrays(int[] a, int[] b) {
  return intersectSortedIntArrays(a, b, null);
}

// no (intermediate) waste version: reuse buffer
static int[] intersectSortedIntArrays(int[] a, int[] b, IntBuffer buf) {
  int i = 0, j = 0, la = lIntArray(a), lb = lIntArray(b);
  
  // swap if a is longer than b
  if (la > lb) {
    int[] temp = a; a = b; b = temp;
    int temp2 = la; la = lb; lb = temp2;
  }
  
  // special case zero elements
  if (la == 0) return null;
  
  // special case one element
  if (la == 1)
    return Arrays.binarySearch(b, a[0]) >= 0 ? a : null;
    
  if (buf == null) buf = new IntBuffer(); else buf.reset();
  intersectSortedIntArrays_recurse(a, b, buf, 0, la, 0, lb);
  return buf.toArray();
}

static void intersectSortedIntArrays_recurse(int[] a, int[] b, IntBuffer buf, int aFrom, int aTo, int bFrom, int bTo) {
  if (aFrom >= aTo || bFrom >= bTo) return; // nothing to do
  
  // start in the middle of a, search this element in b
  int i = (aFrom+aTo)/2;
  int x = a[i];
  int j = Arrays.binarySearch(b, bFrom, bTo, x);

  if (j >= 0) {
    // element found
    intersectSortedIntArrays_recurse(a, b, buf, aFrom, i, bFrom, j);
    buf.add(x);
    intersectSortedIntArrays_recurse(a, b, buf, i+1, aTo, j+1, bTo);
  } else {
    j = -j-1;
    intersectSortedIntArrays_recurse(a, b, buf, aFrom, i, bFrom, j);
    intersectSortedIntArrays_recurse(a, b, buf, i+1, aTo, j, bTo);
  }
}


static int lIntArray(int[] a) {
  return a == null ? 0 : a.length;
}


static class IntBuffer {
  int[] data;
  int size;
  
  IntBuffer() {}
  IntBuffer(int size) { if (size != 0) data = new int[size]; }
  
  void add(int i) {
    if (size >= lIntArray(data))
      data = resizeIntArray(data, Math.max(1, lIntArray(data)*2));
    data[size++] = i;
  }
  
  int[] toArray() {
    return size == 0 ? null : resizeIntArray(data, size);
  }
  
  void reset() { size = 0; }
}

static int[] resizeIntArray(int[] a, int n) {
  if (n == lIntArray(a)) return a;
  int[] b = new int[n];
  arraycopy(a, 0, b, 0, Math.min(lIntArray(a), n));
  return b;
}

static void arraycopy(Object src, int srcPos, Object dest, int destPos, int n) {
  if (n != 0)
    System.arraycopy(src, srcPos, dest, destPos, n);
}


Let's consider two sorted arrays: -

int[] array1 = {1,2,3,4,5,6,7,8};
int[] array2 = {2,4,8};

int i=0, j=0;    //taken two pointers

While loop will run till both pointers reach up to the respective lengths.

while(i<array1.length || j<array2.length){
    if(array1[i] > array2[j])     //if first array element is bigger then increment 2nd pointer
       j++;
    else if(array1[i] < array2[j]) // same checking for second array element
      i++;
    else {                         //if both are equal then print them and increment both pointers
        System.out.print(a1[i]+ " ");

        if(i==a1.length-1 ||j==a2.length-1)   //one additional check for ArrayOutOfBoundsException
            break;
        else{
            i++;
            j++;
        }
    }
}        

Output will be: -

2 4 8


Very Simple with the PYTHON

Example: A=[1,2,3,5,7,9,90] B=[2,4,10,90]

Here we go three lines of code

for i in A:
     if(i in B):
        print(i)

Output:2, 90


Here an answer I have tested working to match two arrays that are both sorted but might have duplicate keys and values as entries. I.e. both lists are sorted by the key 'timestamp'. Then .equals detects matching. This one finds the intersection of a and b where the intersection of duplicates consumes them. I.e. each element of a that matches an element in b uses up that a entry. Sorry for the specifics here from a particular project but maybe it is useful.

I finally did the solution below after a lot of missing around with HashSet (doesn't handle duplicates), Guava MultiSet (excellent but if you look into it, it has a LOT of overhead checking).

   /**
     * Finds the intersection of events in a that are in b. Assumes packets are
     * non-monotonic in timestamp ordering. 
     *
     *
     * @param a ArrayList<BasicEvent> of a
     * @param b likewise
     * @return ArrayList of intersection
     */
    private ArrayList<BasicEvent> countIntersect(ArrayList<BasicEvent> a, ArrayList<BasicEvent> b) {
        ArrayList<BasicEvent> intersect = new ArrayList(a.size() > b.size() ? a.size() : b.size());
        int count = 0;
        if (a.isEmpty() || b.isEmpty()) {
            return new ArrayList();
        }

        // TODO test case
//        a = new ArrayList();
//        b = new ArrayList();
//        a.add(new BasicEvent(4, (short) 0, (short) 0)); // first arg is the timestamp
//        a.add(new BasicEvent(4, (short) 0, (short) 0));
//        a.add(new BasicEvent(4, (short) 1, (short) 0));
//        a.add(new BasicEvent(4, (short) 2, (short) 0));
////        a.add(new BasicEvent(2, (short) 0, (short) 0));
////        a.add(new BasicEvent(10, (short) 0, (short) 0));
//
//        b.add(new BasicEvent(2, (short) 0, (short) 0));
//        b.add(new BasicEvent(2, (short) 0, (short) 0));
//        b.add(new BasicEvent(4, (short) 0, (short) 0));
//        b.add(new BasicEvent(4, (short) 0, (short) 0));
//        b.add(new BasicEvent(4, (short) 1, (short) 0));
//        b.add(new BasicEvent(10, (short) 0, (short) 0));
        int i = 0, j = 0;
        int na = a.size(), nb = b.size();
        while (i < na && j < nb) {
            if (a.get(i).timestamp < b.get(j).timestamp) {
                i++;
            } else if (b.get(j).timestamp < a.get(i).timestamp) {
                j++;
            } else {
                // If timestamps equal, it might be identical events or maybe not
                // and there might be several events with identical timestamps.
                // We MUST match all a with all b.
                // We don't want to increment both pointers or we can miss matches.
                // We do an inner double loop for exhaustive matching as long as the timestamps
                // are identical. 
                int i1 = i, j1 = j;
                while (i1 < na && j1 < nb && a.get(i1).timestamp == b.get(j1).timestamp) {
                    boolean match = false;
                    while (j1 < nb && i1 < na && a.get(i1).timestamp == b.get(j1).timestamp) {
                        if (a.get(i1).equals(b.get(j1))) {
                            count++;
                            intersect.add(b.get(j1)); // TODO debug
                            // we have a match, so use up the a element
                            i1++;
                            match = true;
                        }
                        j1++;
                    }
                    if (!match) {
                        i1++; // 
                    }
                    j1 = j; // reset j to start of matching timestamp region
                }
                i = i1; // when done, timestamps are different or we reached end of either or both arrays
                j = j1;
            }
        }
//        System.out.println("%%%%%%%%%%%%%%");
//        printarr(a, "a");
//        printarr(b, "b");
//        printarr(intersect, "intsct");
        return intersect;
    }

    // TODO test case
    void printarr(ArrayList<BasicEvent> a, String n) {
        final int MAX = 30;
        if (a.size() > MAX) {
            System.out.printf("--------\n%s[%d]>%d\n", n, a.size(), MAX);
            return;
        }
        System.out.printf("%s[%d] --------\n", n, a.size());
        for (int i = 0; i < a.size(); i++) {
            BasicEvent e = a.get(i);
            System.out.printf("%s[%d]=[%d %d %d %d]\n", n, i, e.timestamp, e.x, e.y, (e instanceof PolarityEvent) ? ((PolarityEvent) e).getPolaritySignum() : 0);
        }
    }


 //intersection of two arrays
#include<iostream>
using namespace std;
int main() {

int i=0,j=0,m,n;
int arr1[i],arr2[j];
cout<<"Enter the number of elements in array 1: ";
cin>> m;
cout<<"Enter the number of elements in array 2: ";
cin>>n;
for (i=0;i<m;i++){
    cin>> arr1[i];
}
for(j=0;j<n;j++){
    cin>> arr2[j];
}
for(j=0;j<n;j++){
    for(i=0;i<m;i++) {
        if (arr1[i] == arr2[j]){
        cout<< arr1[i];
        cout << ' ';
        break;
        }
    } 
 }    

 return 0;
 }
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜