开发者

C: segfault on merge sort only on large files

The following code sorts an array of words, working on small arrays, and segfaulting on large ones (>400000 words, though I haven't found a limit). It is being called by a program that passes it an array of words (read from a file) to be sorted and tests its success:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#include "csort.h"
#include "sort.h"

// array points to array of pointers to strings, count is number of entries in array

void sortC(char** array, unsigned int count){
  array = merge_sort(array, count);
  // testing:
  /*for (int i = 0; i < count; i++){
    printf("%s ", array[i]);
    }*/
}

char** merge_sort(char** array, int count){
  if (count <= 1) return array;
  else {
    int lcount = 0;
    int rcount = 0;
    int middle = count/2;
    lcou开发者_开发百科nt = middle;
    char* left[lcount];
    subArray(array, left, 0, middle);
    rcount = count-middle;
    char* right[rcount];
    subArray(array, right, middle, count);
    return merge(merge_sort(left, lcount), merge_sort(right, rcount), array, 0, lcount, rcount);
  }
}

void subArray(char** array, char** subarray, int start, int end){
  int ai; // index in original array
  int si; // index in subarray
  for (ai = start, si = 0; ai < end; ai++, si++){
    subarray[si] = array[ai];
  }
}

char** merge(char** left, char** right, char** output, int oi, int lcount, int rcount){
  if (lcount > 0 && rcount > 0){
    int lmin = findMinimum(left, lcount);
    int rmin = findMinimum(right, rcount);
    if (strcmp(left[lmin], right[rmin]) < 0){
      output[oi] = left[lmin];
      removeFromArray(left, lmin, lcount);
      lcount--;
    }
    else {
      output[oi] = right[rmin];
      removeFromArray(right, rmin, rcount);
      rcount--;
    }
  }
  else if (lcount == 0) {
    if (rcount == 1) {
      output[oi] = right[0];
      return output;
    } else {
      int rmin = findMinimum(right, rcount);
      output[oi] = right[rmin];
      removeFromArray(right, rmin, rcount);
      rcount--;
    }
  }
  else if (rcount == 0) {
    if (lcount == 1) {
      output[oi] = left[0];
      return output;
    } else {
      int lmin = findMinimum(left, lcount);
      output[oi] = left[lmin];
      removeFromArray(left, lmin, lcount);
      lcount--;
    }
  }
  return merge(left, right, output, ++oi, lcount, rcount);
}

int findMinimum(char** array, int count){
  char* minvalue = array[0];
  char* currentvalue = minvalue;
  int minindex = 0;
  for (int i = 1; i < count; i++){
    currentvalue = array[i];
    if (strcmp(currentvalue, minvalue) < 0){
      minvalue = currentvalue;
      minindex = i;
    }
  }
  return minindex;
}

void removeFromArray(char** array, int index, int count){
  // removes specified index from an array
  for (int i = index; i < count; i++){
    if (i+1 == count){
      array[i] = 0; // this entry will be gone when count decrements
    } else {
      array[i] = array[i+1];
    }
  }
}


If there's no bug on your code then the problem might be how you are storing the data. Do you use malloc() to allocate the array to store your data or are you declaring an array that is big enough?

For large data sets you must use malloc(), which will allocate space on the HEAP instead of the stack. The stack has a limited space. This would explain why with smaller data your program works and with bigger data sets it crashes.

Also one very important point is that you are using recursion: merge() calling merge(). Too many recursive calls could lead to a stack overflow (segfault).


Looks like a stack overflow, you are allocating automatic arrays of thousands if items in each invocation, and then recursing.

These lines, to be specific:

char* left[lcount];

and

char* right[rcount];

For the values in your comment, where count == 7157, this would be quite costly in terms of stack space.

Consider either using malloc() for these, or figure out a way to represent a sub-array without requiring new memory.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜