Problem to integrate sequential code to parallel code MPI

2023-02-18 23:27 问答作者：

i tried to integrate an edge-detection laplacian operator into my previous code of MPI. The problem i have now is to do the edge detection in 1d array after the data is scatter. I got the output but the colour is inverted from supposely image. Can anybody help me solve this problem. This is the parallel code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include <math.h>

#define SIZE_X 640
#define SIZE_Y 480
#define   smooth  3

int mod(int z, int l);


/****************** Main Program ***********************/

int main(int argc, char **argv)
{
FILE *FR,*FW;
int ierr;
int rank, size, a[100000], sum, m, n;
int ncells;
int greys[SIZE_X][SIZE_Y];
int rows,cols, maxval;
int mystart, myend, myncells;
const int IONODE=0;
int *disps, *counts, *mydata;
int *data;
int i,j,temp1;
char dummy[50]="";
int csx,sum1,sum2,k,l,x;//w1[3][3],w2[3][3]; 

  //Laplacian Operator
static int w1[3][3]={
{0,-1,0},
{-1,4,-1},
{0,-1,0}
};

static int w2[3][3]={
{0,-1,0},
{-1,4,-1},
{0,-1,0}
};

/****************** Initialize MPI ***********************/



ierr = MPI_Init(&argc, &argv);
if (argc != 3) {
    fprintf(stderr,"Usage: %s infile outfile\n",argv[0]);
    fprintf(stderr,"outputs the negative of the input file.\n");
    return -1;
}            

ierr  = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
ierr = MPI_Comm_size(MPI_COMM_WORLD, &size);
if (ierr) {
fprintf(stderr,"Catastrophic MPI problem;  exiting\n");MPI_Abort(MPI_COMM_WORLD,1);
  }




  /****************** Master open image file for read  ***********************/


if (rank == IONODE) {
    rows=SIZE_X;
    cols=SIZE_Y;
    maxval=255;
    FR=fopen(argv[1], "r+");


  /****************** Read the header part of the image ***********************/

    fgets(dummy,50,FR);
    do{  fgets(dummy,50,FR); } while(dummy[0]=='#');
    fgets(dummy,50,FR);

    for (j = 0; j <cols; j++)
        for (i = 0; i <rows; i++)
   {
       fscanf(FR,"%d",&temp1);
     greys[i][j] = temp1;
   }


 /****************** Read pixel values ***********************/

ncells = rows*cols;
disps = (int *)malloc(size * sizeof(int));
counts= (int *)malloc(size * sizeof(int));
data = &(greys[0][0]); /* we know all the data is contiguous */
}

// Time every processor 
//MPI_Barrier(MPI_COMM_WORLD);
//p_time = MPI_Wtime();


 /****************** Everyone calculate their number of cells ***********************/

ierr = MPI_Bcast(&ncells, 1, MPI_INT, IONODE, MPI_COMM_WORLD);
myncells = ncells/size;
mystart = rank*myncells;
myend   = mystart + myncells - 1;
if (rank == size-1) myend = ncells-1;
myncells = (myend-mystart)+1;
mydata = (int *)malloc(myncells * sizeof(int));


 /****************** Assemble the list of counts. Might not be equal if don't divide evenly.***********************/

ierr = MPI_Gather(&myncells, 1, MPI_INT, counts, 1, MPI_INT, IONODE, MPI_COMM_WORLD);
if (rank == IONODE) {
    disps[0] = 0;
    for (i=1; i<size; i++) {
        disps[i] = disps[i-1] + counts[i-1];
    }
}


/****************** Scatter the data to all processor ***********************/

ierr = MPI_Scatterv(data, counts, disps, MPI_INT, mydata, myncells, MPI_INT, IONODE, MPI_COMM_WORLD);


/****************** All processor do AVERAGE FILTERING ***********************/

csx=smooth/2; 
for (i=0; i<myncells; i++)
{
 sum1=0;
 sum2=0;
 for(k=0;k<smooth;k++)
 {
   for(l=0;l<smooth;l++) 
   {
     x=i+k-csx; 
     sum1+=w1[k][l]* mydata[mod(x,myncells)];
     sum2+=w2[k][l]* mydata[mod(x,myncells)];    
   }
 }

 if((abs(sum1)+abs(sum2))>125)
   mydata[i]=255;
 else
   mydata[i]=0;
}


/******************  Gather the data from all processor ***********************/

ierr = MPI_Gatherv(mydata, myncells, MPI_INT, data, counts, disps, MPI_INT, IONODE, MPI_COMM_WORLD);

//MPI_Barrier(MPI_COMM_WORLD);  
//p2_time = MPI_Wtime();
//printf("\nProcessor %d = %g microseco开发者_JS百科nds\n", rank, (p2_time*1000000)-(p_time*1000000));       


/******************  Master open image file for write ***********************/  

if (rank == IONODE){
    FW=fopen(argv[2], "w");
    fprintf(FW,"P2\n%d %d\n255\n",rows,cols);    
    for(j=0;j<cols;j++)
        for(i=0;i<rows;i++)
            fprintf(FW,"%d ", greys[i][j]);

    }

free(mydata);
if (rank == IONODE) {
free(counts);
free(disps);

}

if (rank == IONODE) {
    fclose(FR);
    fclose(FW);

    }



MPI_Finalize();

return 0;

 }


//Sub routine

 /* periodic extension (outside of the  image frame) */
  int mod(int z, int l)
  {
  if( z >= 0 && z < l ) return z;
  else
  if( z < 0) return (z+l);
  else
    if( z > (l-1)) return (z-l);
   return 0;
  }

This is the sequential code that i need to integrate to program above

#include <stdio.h>
#include <stdlib.h>
#include<conio.h>
#include<time.h>
 #include<math.h>

#define   size_x  203   
#define   size_y  152


typedef struct

{

  int imagesize_x, imagesize_y;

  int **pixel;

}image_t;

image_t allocate_image(const int imagesize_x, const int imagesize_y);

int mod(int z, int l);



void main()

{

image_t   image_in,image_out;



 int m,n, temp;

 int smooth,csx,csy; 

 int k,l,x,y,sum1,sum2;



 FILE *cpp1,*cpp2;



 char dummy[50]="";



 //Laplacian Operator



 static int w1[3][3]={

  {0,-1,0},

  {-1,4,-1},

  {0,-1,0}

  };



 static int w2[3][3]={

  {0,-1,0},

  {-1,4,-1},

  {0,-1,0}

  };





 cpp1=fopen("e:\\input_image\\A.pgm", "r+");

 cpp2=fopen("e:\\output_image\\edge_lap.pgm", "w+");







 fgets(dummy,50,cpp1);           

 do{

  fgets(dummy,50,cpp1);         

 }while(dummy[0]=='#');

fgets(dummy,50,cpp1);           





 fprintf(cpp2,"P2\n%d %d\n255\n",(size_x),(size_y));





image_in =  allocate_image(size_x,size_y);

 image_out = allocate_image(size_x,size_y);





                            //Reading Input Image



 for (n = 0; n < size_y; n++)

 {

  for (m = 0; m <size_x; m++)

   {



    fscanf(cpp1,"%d",&temp);



      image_in.pixel[m][n] = temp;





    }

   }


                        /* Edge Detection  */



 smooth=3;

 csx=smooth/2; csy=smooth/2;



                         //Edge detection

 for (n = 0; n < size_y; n++) {

  for (m = 0; m < size_x; m++) { 



 sum1=0;sum2=0;



 for(k=0;k<smooth;k++){

   for(l=0;l<smooth;l++) {



 x=m+k-csx; y=n+l-csy;    



     sum1+=w1[k][l]* image_in.pixel[mod(x,size_x)][mod(y,size_y)];

     sum2+=w2[k][l]* image_in.pixel[mod(x,size_x)][mod(y,size_y)];



   }

 }

 if((fabs(sum1)+fabs(sum2))>125)

   image_out.pixel[m][n]=255;

 else

   image_out.pixel[m][n]=0;

   }

  }



                        //Writing Edge Detected Image



   for (n = 0; n < size_y; n++)

   {

for (m = 0; m <size_x; m++)

 {



    fprintf(cpp2,"%d ",image_out.pixel[m][n]);       



   }

    }

   }



    image_t allocate_image(const int imagesize_x, const int imagesize_y)

     {

  image_t result;

   int x =  0, y = 0;



   result.imagesize_x = imagesize_x;

     result.imagesize_y = imagesize_y;



      result.pixel =(int **) calloc(imagesize_x, sizeof(int*));



   for(x = 0; x < imagesize_x; x++)

     {

     result.pixel[x] =(int*) calloc(imagesize_y, sizeof(int));



     for(y = 0; y < imagesize_y; y++)

   {

     result.pixel[x][y] = 0;

     }

      }



     return result;

  }





  int mod(int z, int l)

   {

    if( z >= 0 && z < l ) return z;

       else

    if( z < 0) return (z+l);

    else

  if( z > (l-1)) return (z-l);

     }

This is the input image http://orion.math.iastate.edu/burkardt/data/pgm/balloons.pgm

I run with : mpirun -np 10 ./mysource balloons.pgm output.pgm

So in your previous question where you just needed to invert the image, the 2d structure of the image didn't matter and you could just divide the image into (number of pixels)/(number of processes) chunks and each could invert their pixels.

Here, though, the 2-d structure does matter; to apply the stencil, you need to have all of the neighbouring pixels. And on the edge of the image, you need the data from the other edge (in physics, we call this "periodic boundary conditions" -- the image wraps around like a torus).

So we need to decompose the data in a way that maintains the 2d structure of the data. We could do a full 2d decomposition; so if there were 6 processes, the image would be broken up like

+---+---+---+
| 0 | 1 | 2 |
+---+---+---+
| 3 | 4 | 5 |
+---+---+---+

(Here it is 2011 and I'm still doing ascii art to communicate things over computers. How did that happen?)

but it's simpler for now to do a 1-d decomposition; that is, we keep the 2d structure of the data but we only chop up the data along one dimension. In C, it's much easier to do this along rows, so the data pieces are contiguous in memory:

-----1------
-----2------
-----3------
-----4------
-----5------
-----6------

By far the easiest way to do this is to pad the number of rows in the original image so that it divides evenly between the number of tasks.

Now, also, if one task is in charge of applying the stencil to row zero, it needs data from row (nrows-1), and vice-versa; similarly for column 0 and column (ncols-1). So we'll pad the array by by two more rows and columns, and copy data from column (ncols-1) into column 0 when we read the data, etc.

So in that case, reading your image changes to something like this:

fgets(dummy,50,FR);
do{  fgets(dummy,50,FR); } while(dummy[0]=='#');
sscanf(dummy,"%d %d",&cols, &rows);
fgets(dummy,50,FR);


nrowspertask = (rows/size);
if (nrowspertask*size < rows) nrowspertask++;
int totrows = nrowspertask*size;

/* pad the number of rows so it divides evenly by # of tasks */
/* and then add 2 rows, 2 cols, for "wraparound" at edges */

image_t image;
image = allocate_image( cols+2, totrows+2 );

/****************** Read pixel values ***********************/

for (j = 0; j <cols; j++)
    for (i = 0; i <rows; i++)
    {
        fscanf(FR,"%d",&temp1);
        image.pixel[j+1][i+1] = temp1;
    }

/* copy bottom row to top, top row to bottom */
for (j=1; j<cols+1; j++) {
    image.pixel[j][0]      = image.pixel[j][rows];
    image.pixel[j][rows+1] = image.pixel[j][1];
}

/* copy leftmost col to right, rightmost col to left */
for (i=1; i<rows+1; i++) {
    image.pixel[0][i]      = image.pixel[cols][i];
    image.pixel[cols+1][i] = image.pixel[1][i];
}

Now comes the decomposition. Each task is going to be in charge of nrowspertask rows; but they need the row above and below to do this job, and the padding on the side, too. So each image is going to have to receive (nrowspertask+2)*(cols+2) ints, starting at &(image.pixel[rank*(nrowspercol)][0]). We can still use MPI_Scatterv() for this - we set up the counts and disps as

data = &(image.pixel[0][0]); /* we know all the data is contiguous */
disps = (int *)malloc(size * sizeof(int));
counts= (int *)malloc(size * sizeof(int));

for (i=0;i<size;i++) {
    counts[i]=(nrowspertask+2)*(cols+2);
    disps[i]=i*(nrowspertask)*(cols+2);
}

and then everyone gets their data:

locimage = allocate_image(cols+2, nrowspertask+2);
ierr = MPI_Scatterv(data, counts, disps, MPI_INT,&(locimage.pixel[0][0]),
                         (nrowspertask+2)*(cols+2), MPI_INT, IONODE, MPI_COMM_WORLD);

Now you do your filter very much like in the serial case, except you don't have to worry about mods because you have padded the data with the info you need.

You gatherv the data back into the array very similarly, but I'll leave you to figure that one out; you only send back the "real" rows, not the padded rows, or else you would be overwriting data.

继续阅读：mpi pgm

Problem to integrate sequential code to parallel code MPI

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？