How to read data from file and clean and store to a file
Good day all.
I have a file that contains data which some part is shown below. It contains the records at every 3 seconds taking for a period of 30 days. I want to perform the following on the data.
2010-03-03 16:00:31; 66.89; 24.0; 14.89; 0.08;
2010-03-03 16:00:35; 66.15; 24.1; 14.85; 0.08;
2010-03-03 16:00:38; 67.10; 24.2; 14.81; 0.08;
2010-03-03 16:00:42; 66.36; 24.3; 14.78; 0.08;
2010-03-03 16:00:46; 65.83; 24.4; 14.75; 0.09;
.
.
.
2010-03-03 17:00:31; 62.78; 25.2; 13.96; 0.12;
2010-03-03 17:00:35; 63.94; 25.3; 13.92; 0.11;
2010-03-03 17:00:39; 61.94; 25.开发者_JS百科3; 13.89; 0.11;
2010-03-03 17:00:43; 60.99; 25.4; 13.88; 0.12;
2010-03-03 17:00:46; 62.67; 25.4; 13.89; 0.13;
2010-03-03 17:00:50; 62.57; 25.3; 13.91; 0.13;
2010-03-03 17:00:54; 61.51; 25.3; 13.91; 0.14;
.
.
.
2010-03-03 18:00:29; 66.04; 20.7; 13.63; 0.56;
2010-03-03 18:00:33; 66.04; 20.7; 13.63; 0.56;
2010-03-03 18:00:37; 65.52; 20.7; 13.59; 0.56;
2010-03-03 18:00:40; 64.46; 20.7; 13.56; 0.56;
2010-03-03 18:00:44; 64.88; 20.8; 13.56; 0.56;
.
.
.
- Read all the line in the file starting from the first line.
2.For each hour, I want calculate the sum of the data in the second column only and the number of records that gave rise to this sum.
3.And then print to a file the following informations:
i.Date;sum(1st hour),number of records that gave rise to this sum;sum(2nd hour),num_records;sum(3rd hour),num_records;...; sum total(24 hours),totol_records;mean
This is the example of what I want to print to a file as described above;
03\03\2010; 15093.47; 379; 16025.46; 380; 14800.58; 379; 14605.34; 380; 21754.27,379;...;82279.12,1897;43,37
04\03\2010; 6842.051; 379; 7137.491; 380; 7215.16; 380; 7159.189; 379; 6594.672; 380;...;34948.56,1898;18,41
05\03\2010; 9938.37; 379; 9670.438; 380; 8232.032; 380; 9198.899; 379; 7083.687; 380;...;44123.426,1898;23,25
I have started with this code
int file_readline(char *file_in,char *outfile,char *strline) {
FILE *fd=NULL;
FILE *fo= NULL
char *date, *tmp,*time;
double sum=0;
double mean = 0;
strline=calloc(MAX_BUFFER_SIZE,sizeof(strline));
if (strline==NULL) {
printf ("Error calloc strline.................");
exit(EXIT_FAILURE);
}
file_in = calloc((strlen(strline)+strlen(file_in)),sizeof(file_in));
if (file_in==NULL)
{
printf ("Error calloc strline.................");
exit(EXIT_FAILURE);
}
fd=fopen(file_in,"r");
int i = 0;
int j = 0;
while ((fgets (strline, BUFSIZ, fd))>0 && !feof(fd)){
date = strtok(strline, " ");
time=strtok(NULL, " ");
tmp = strtok(NULL, ";");
if (i == 3) { // get only the 3rd value
sum += strtod(tmp, NULL);
++i;
// don't know how to proceed from here
I would approach this by making each line an array and then parse through the array and do your calculations as it looks like you are doing. It looks like your data file is consistent in format, so might as well take advantage. Storing the array cells in particular variables and then casting them to floats for your math is probably the simplest approach.
enum {OUT,ADD};
void processline(int action,int hour,float val)
{
static int lasthour,z;
static float vals[25];
static int nums[25];
if( OUT==action )
{
if(lasthour)
{
int i;
for(i=1;i<=z;++i)
printf("%.2f; %d; ",vals[i],nums[i]);
printf("%d; %.2f\n",*nums,*vals);
lasthour=z=0;memset(vals,0,sizeof vals);memset(nums,0,sizeof nums);
}
return;
}
if( hour!=lasthour )
++z,lasthour=hour;
vals[z]+=val;
nums[z]++;
*vals+=val;
*nums+=1;
}
main()
{
char z[100],lastday[20]="";
FILE *f=fopen("test.txt","rt");
while( fgets(z,sizeof z,f) )
{
char a[100],b[100];
float fl;
if( 3==sscanf(z,"%[^ ]%[^;];%f",a,b,&fl) )
{
if( strcmp(lastday,a) )
{
if(*lastday)
printf("%s; ",lastday),processline(OUT,0,0);
strcpy(lastday,a);
}
processline(ADD,atoi(b),fl);
}
}
if(*lastday)
printf("%s; ",lastday),processline(OUT,0,0);
fclose(f);
return 0;
}
should work. How many Perl-LoCs needed?!
精彩评论