Sort data in C language
I need little help on following requirement, as I know very little about C syntax.
I have data in a file like this
73 54 57 [52]
75 73 65 [23]
65 54 57 [22]
22 59 71 [12]
22 28 54 [2]
65 22 54 73 [12]
65 28 54 73 [52]
22 28 65 73 [42]
65 54 57 73 [22]
22 28 54 73 [4]
Where values in bracket denotes the occurrence of that series. I need to sort this data b开发者_运维百科ased on the occurrence of the data descending with maximum elements on the top as follows.
65 28 54 73 [52]
22 28 65 73 [42]
65 54 57 73 [22]
65 22 54 73 [12]
22 28 54 73 [4]
28 59 71 [122]
73 54 57 [52]
22 28 65 [26]
..
.
.
.
And so on...
What is a quick code for this?
I am trying this
#include<string.h>
#include <stdio.h>
int main() {
FILE *infile;
char fname[40]="resultFile1.txt";
char line[100];
int lcount=0;
if((infile = fopen(fname, "r")) == NULL) {
printf("Error Opening File.\n");
}
char *Arr[23];// need to be dynamic
while( fgets(line, sizeof(line), infile) != NULL ) {
stringArray[lcount]=line;
lcount++;
Arr[lcount]=line;
} fclose(infile);
int i;
for (i = 0; i < lcount; i++) {
printf(Arr[i]);// not able to get Arr
}
}
I would:
- Load the text into memory, as a big block of text.
- Find the start of each line, creating an array of string pointers into the block of data.
- Write a comparison function that compares two lines, by looking for the "[number]" pattern.
- Call
qsort()
on my array of line pointers, using the comparison function. - Be done. :)
It's easiest if you decompose the problem into the small parts (as said above). That is an important part of engineering in general (whether software or otherwise).
The problems are roughly:
- Read the file
- output the result
- count the number of items in a line
- extract the number at the end of the line
- Store the file
- sort the file by number of items in a line
- sort the file by number at the end of the file
After that it is simple!
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define BUFFER_SIZE 100
#define MAX_NODES 1000
int count_spaces( char * s ) {
int spaces = 0;
int i;
for( i = 0; i < strlen( s ) ; i ++ ) {
if( s[ i ] == ' ' ) {
spaces++;
}
i++;
}
return spaces;
}
/* sort by number of spaces */
int sort1( char *a, char * b ) {
int spaces_a = count_spaces( a );
int spaces_b = count_spaces( b );
if( spaces_a < spaces_b )
return -1;
if( spaces_a > spaces_b )
return 1;
return 0;
}
int get_digits( char *s ) {
char buffer[ 10 ];
memset( buffer, 0, 10 );
int state = 0;
int i = 0, j = 0;
int number = 0;
for( i = 0; i < strlen( s ) ; i ++ ) {
if( state == 0 && s[ i ] == '[' ) {
state = 1;
}
if( state == 1 && s[ i ] != ']' ) {
buffer[ j ] = s[ i ];
j++;
}
if( state == 1 && s[ i ] == ']' ) {
break;
}
}
sscanf( buffer, "%d", &number );
}
/* sort by digits in brackets at the end of the line */
int sort2( char *a, char * b ) {
int a_num = get_digits( a );
int b_num = get_digits( b );
if( a_num < b_num )
return -1;
if( a_num > b_num )
return 1;
return 0;
}
int main() {
FILE *infile;
char line[ BUFFER_SIZE ];
int i = 0;
char *nodes[ MAX_NODES ];
if( (infile = fopen( "data.txt", "r" )) == NULL ) {
printf( "Error opening file\n" );
return -1;
}
while( fgets( line, BUFFER_SIZE, infile ) != NULL ) {
if( (nodes[ i ] = malloc( strlen( line ) + 1 )) == NULL ) {
printf( "Malloc failed\n" );
return -1;
}
strcpy( nodes[ i ], line );
i++;
}
// sort by # of items in a line
qsort( nodes, i, sizeof( char *), (void *)(&sort1) );
// sort by number
qsort( nodes, i, sizeof( char *), (void *)(&sort2) );
/* output results */
i--;
do {
printf( "%s", nodes[i] );
i--;
} while( i >= 0 );
}
You may be interested in the qsort function. Basically, you will need to first parse your file so that it in an array of structs, where each struct has the sequence and the "occurence" (key) as you've called it.
Then you can define your own custom comparator function, like:
int compare_file_entries(void* data1, void* data2)
{
struct file_entry* entry1 = (struct file_entry*) data1;
struct file_entry* entry2 = (struct file_entry*) data2;
if ( entry1->occurence < entry2->occurence ){
return -1;
} else if ( entry2->occurence > entry2->occurence ){
return 1;
} else{
return 0;
}
}
Then if you have an array of file_entry
objects, say struct file_entry* entries
, and you have entrycount
such entries, then you would sort that list using:
qsort(entries,entrycount,sizeof(struct file_entry),&compare_file_entries);
Once you have sorted your in-memory representation, you can then write it back out to a file.
If you don't strictly have to do it in C (which might be the case, so this is irrelevant), you can use the standard UNIX tools:
cat test | sed -r 's/.*\[(.+)\]$/\1 \0/' | sort -n -r | cut -d ' ' -f 2-
"cat test": not really needed, just to throw the file on the pipe "sed ....": duplicate the last number in the brakets at the beginning of the line "sort -n -r": sort with the first number in reverse order "cut ...": remove the duplicated field at the beginning of the line
From C, you can call this with system()
, and it will throw the result on stdout, but if you need to read the result back in your program, you will need more than that (pipes..)
Bucket sort them by element number (:Read array length once parsed) then pick a sort that suits your fancy to sort the buckets, or better yet, make the buckets binary search trees who's key value is that of the number in brackets, and voila. Problem solved.
Here is my answer. Sorry for the sheer amount of code!
First, some struct
s
// contains the whole line as well as the series for ease of id
struct _Payload
{
char *line; // the line of text
int series; // the number in [ ] at the end of that line
int numElements; // the number of elements in the line
};
// a tree sorted by the series of the Payloads within
struct _PayloadTree
{
struct _Payload *payload;
struct _PayloadTree *left;
struct _PayloadTree *right;
};
// a tree sorted by the number of elements in the PayloadTree subtrees
struct _Tree
{
int numElements; // for sorting
struct _PayloadTree *payloadTree;
struct _Tree *left;
struct _Tree *right;
};
typedef struct _Payload Payload;
typedef struct _PayloadTree PayloadTree;
typedef struct _Tree Tree;
Next, some helper functions:
Payload *createPayload(char *line, int series, int numElements)
{
Payload * payload = (Payload *)malloc(sizeof(Payload));
payload->line = line;
payload->series = series;
payload->numElements = numElements;
return payload;
}
PayloadTree *createPayloadTree(Payload *p)
{
PayloadTree * payloadTree = (PayloadTree *)malloc(sizeof(PayloadTree));
payloadTree->payload = p;
payloadTree->left = payloadTree->right = NULL;
return payloadTree;
}
Tree *createTree(int numElements)
{
Tree * tree = (Tree *)malloc(sizeof(Tree));
tree->numElements = numElements;
tree->payloadTree = NULL;
tree->left = tree->right = NULL;
return tree;
}
Now to add things to the tree. We first find the number of elements bucket, then within that place in the proper series bucket (PayloadTree
)
void addPayloadToPayloadTree(Payload *p, PayloadTree *payloadTree)
{
// these are sorted according to the value in 'series'
if(payloadTree == NULL) return;
PayloadTree *pt = payloadTree;
while(pt != NULL)
{
// should this happen?
if(p->series == pt->payload->series) break;
else if(p->series < pt->payload->series)
{
if(pt->left == NULL) pt->left = createPayloadTree(p);
else pt = pt->left;
}
else
{
if(pt->right == NULL) pt->right = createPayloadTree(p);
else pt = pt->right;
}
}
}
// Main way to add a line to the tree
void addPayload(Payload *p, Tree **tree)
{
if(*tree == NULL) *tree = createTree(p->numElements);
Tree *t = *tree;
while(t != NULL)
{
if(t->numElements == p->numElements) break;
else if(t->numElements > p->numElements)
{
// look left
if(t->left == NULL)
{
t->left = createTree(p->numElements);
break;
}
t = t->left;
}
else
{
// look right
if(t->right == NULL)
{
t->right = createTree(p->numElements);
break;
}
t = t->right;
}
}
// now t points to the right bucket
if(t->payloadTree == NULL) t->payloadTree = createPayloadTree(p);
addPayloadToPayloadTree(p, t->payloadTree);
}
Finally the print methods. Print right to left:
void printPayloadTree(PayloadTree *pt)
{
if(pt == NULL) return;
printPayloadTree(pt->right);
printf("%s\n", pt->payload->line);
printPayloadTree(pt->left);
}
void printTree(Tree *t)
{
if(t == NULL) return;
printTree(t->right);
printPayloadTree(t->payloadTree);
printTree(t->left);
}
I left out the clear
functions and the main
implementation because that's up to you, of course. Here is the method I used to parse a line. You would call it with parseLine(theLine, &series, &numElements)
where you had previously declared series
and numElements
to be of type int
.
void parseLine(char *line, int *series, int *numElements)
{
char *tok = strtok(line, " ");
int n = 0;
while(tok != NULL)
{
if(tok[0] == '[')
{
// found the series
tok[ strlen(tok) - 2 ] = '\0';
*series = atoi(tok + 1);
}
else
{
n++;
}
tok = strtok(NULL, " ");
}
*numElements = n;
}
精彩评论