开发者

Selecting and analysing window of points in an array

Could someone please advise me on how to resolve this problem.

I have a function which performs a simple regression analysis on a sets of point contained in an array. I have one array (pval) which contains all the data I want to perform regression analysis on. This is how I want to implement this.

  1. I get an average value for the first 7 elements of the array. This is what I call a 'ref_avg' in the programme.

  2. I want to perform a regression analysis for every five elements of the array taking the first element of this array as the 'ref_avg'. That is in every step of the regression analysis I will have 6 points in the array.

    e.g For the 1st step the ref_avg as calculated below is 70.78. So the 1st step in the simple regression will contain these points

    1st = {70.78,76.26,69.17,68.68,71.49,73.08},

    The second step will contain the ref_avg as the 1st element and other elements starting from the second element in the original array

    2nd = {70.78,69.17,68.68,71.49,73.08,72.99},

    3rd = {70.78,68.68,71.49,73.08,72.99,70.36},

    4th = {70.78,71.49,73.08,72.99,70.36,57.82} and so on until the end.

  3. The regression function is also shown below.

I don't understand why the first 3 elements of the 'calcul' array have value 0.00 on the first step of the regression, 2 elements on the 2nd step,1 elements on the 3rd. Also the last step of the regression function is printed 3 times.

  #include <stdio.h>
  #include <stdlib.h>           
  #include <string.h>   

   int main()
{

  float pval[]={76.26,69.17,68.68,71.49,73.08,72.99,70.36,57.82,58.98,69.71,70.43,77.53,80.77,70.30,70.5,70.79,75.58,76.88,80.20,77.69,80.80,70.5,85.27,75.25};


   int count,Nhour;
   const int MAX_HOUR = 24;
   float *calcul=NULL;
   float *tab_time =NULL;
   float ref_avg;
   int size_hour=7;
   float sum=0;
   int length = Nhour+1;
   float m;
   float b;
   calcul=(float*)calloc(MAX_HOUR,sizeof(calcul));
     if (calcul==NULL) 
    {
        printf(" error in buffer\n");
        exit(EXIT_FAILURE);
    }

   tab_time= calloc(MAX_HOUR,sizeof(float));

         /* Get the average of the first seven elements */
            int i;
    for (i=0;i<size_hour;i++)
    {
    sum += pval[i];
    }
    ref_avg = sum / size_hour; 

          count=0;
        /* perform the regression analysis on 5 hours increment */

         while(count<=MAX_HOUR)
         {
          ++count;
           Nhour=5;

           int pass = -(Nhour-1);
           int i=0;

           for(i=0;i<Nhour+1;i++)  
             {
             if(count<MAX_HOUR)
               {

              calcul[0]=ref_avg;
              calcul[i] =pval[count+pass];
              pass++;
               }

     printf("calc=%.2f\n",calcul[i]); // For debug only 
     tab_time[i]=i+1; 

               if(i==Nhour)
            {

     开发者_运维技巧      linear_regression(tab_time, calcul, length, &m, &b);
           printf("Slope= %.2f\n", m);

            }
           }
     }

    free(calcul);
    calcul=NULL;
    free(tab_time);
    tab_time=NULL;              
    return 0;
  }
  /*  end of the main function */


   /* This function is used to calculate the linear 
    regression as it was called above in the main function. 
    It compiles and runs very well, was just included for the 
    compilation and execution of the main function above where I have a problem. */


    int linear_regression(const float *x,  const float *y, const int n, float *beta1, float *beta0)
    {

          float sumx = 0,
        sumy = 0,
        sumx2 = 0,
        sumxy = 0;

int i;
if (n <= 1) {
    *beta1 = 0;
    *beta0= 0;
    printf("Not enough data for regression \n");
        } 
          else 
            {
    float variance;

    for (i = 0; i < n; i++) 
             {
        sumx += x[i];
        sumy += y[i];

        sumx2 += (x[i] * x[i]);

        sumxy += (x[i] * y[i]);
     }
    variance = (sumx2 - ((sumx * sumx) / n));
    if ( variance != 0) {
        *beta1 = (sumxy - ((sumx * sumy) / n)) /  variance;
        *beta0 = (sumy - ((*beta1) * sumx)) / n;
    } 
           else  
                 {
        *beta1 = 0;
        *beta0 = 0;

         }

        }
          return 0;
      }


I think this code produces sane answers. The reference average quoted in the question seems to be wrong. The memory allocation is not needed. The value of MAX_HOUR was 24 but there were only 23 data values in the array. The indexing in building up the array to be regressed was bogus, referencing negative indexes in the pval array (and hence leading to erroneous results). The variable Nhour was referenced before it was initialized; the variable length was not correctly set. There wasn't good diagnostic printing.

The body of main() here is substantially rewritten; the editing on linear_regression() is much more nearly minimal. The code is more consistently laid out and white space has been used to make it easier to read. This version terminates the regression when there is no longer enough data left to fill the array with 5 values - it is not clear what the intended termination condition was.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>           
#include <string.h>   

void linear_regression(const float *x, const float *y, const int n,
                       float *beta1, float *beta0);

int main(void)
{
    float pval[]={
        76.26, 68.68, 71.49, 73.08, 72.99, 70.36, 57.82, 58.98,
        69.71, 70.43, 77.53, 80.77, 70.30, 70.50, 70.79, 75.58,
        76.88, 80.20, 77.69, 80.80, 70.50, 85.27, 75.25,
        };
    const int Nhour = 5;
    const int MAX_HOUR = sizeof(pval)/sizeof(pval[0]);
    const int size_hour = 7;
    float ref_avg;
    float sum = 0.0;
    float m;
    float b;
    float calc_y[6];
    float calc_x[6];

    /* Get the average of the first seven elements */
    for (int i = 0; i < size_hour; i++)
        sum += pval[i];
    ref_avg = sum / size_hour; 
    printf("ref avg = %5.2f\n", ref_avg); // JL

    /* perform the regression analysis on 5 hours increment */
    for (int pass = 0; pass <= MAX_HOUR - Nhour; pass++) // JL
    {
        calc_y[0] = ref_avg;
        calc_x[0] = pass + 1;
        printf("pass %d\ncalc_y[0] = %5.2f, calc_x[0] = %5.2f\n",
               pass, calc_y[0], calc_x[0]);
        for (int i = 1; i <= Nhour; i++)  
        {
            int n = pass + i - 1;
            calc_y[i] = pval[n];
            calc_x[i] = pass + i + 1; 
            printf("calc_y[%d] = %5.2f, calc_x[%d] = %5.2f, n = %2d\n",
                   i, calc_y[i], i, calc_x[i], n);
        }

        linear_regression(calc_x, calc_y, Nhour+1, &m, &b);
        printf("Slope= %5.2f, intercept = %5.2f\n", m, b);
    }

    return 0;
}

void linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0)
{
    float sumx1 = 0.0;
    float sumy1 = 0.0;
    float sumx2 = 0.0;
    float sumxy = 0.0;

    assert(n > 1);

    for (int i = 0; i < n; i++) 
    {
        sumx1 += x[i];
        sumy1 += y[i];
        sumx2 += (x[i] * x[i]);
        sumxy += (x[i] * y[i]);
    }
    float variance = (sumx2 - ((sumx1 * sumx1) / n));
    if (variance != 0.0)
    {
        *beta1 = (sumxy - ((sumx1 * sumy1) / n)) /  variance;
        *beta0 = (sumy1 - ((*beta1) * sumx1)) / n;
    } 
    else  
    {
        *beta1 = 0.0;
        *beta0 = 0.0;
    }
}
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜