Problem to integrate sequential code to parallel code MPI
i tried to integrate an edge-detection laplacian operator into my previous code of MPI. The problem i have now is to do the edge detection in 1d array after the data is scatter. I got the output but the colour is inverted from supposely image. Can anybody help me solve this problem. This is the parallel code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include <math.h>
#define SIZE_X 640
#define SIZE_Y 480
#define smooth 3
int mod(int z, int l);
/****************** Main Program ***********************/
int main(int argc, char **argv)
{
FILE *FR,*FW;
int ierr;
int rank, size, a[100000], sum, m, n;
int ncells;
int greys[SIZE_X][SIZE_Y];
int rows,cols, maxval;
int mystart, myend, myncells;
const int IONODE=0;
int *disps, *counts, *mydata;
int *data;
int i,j,temp1;
char dummy[50]="";
int csx,sum1,sum2,k,l,x;//w1[3][3],w2[3][3];
//Laplacian Operator
static int w1[3][3]={
{0,-1,0},
{-1,4,-1},
{0,-1,0}
};
static int w2[3][3]={
{0,-1,0},
{-1,4,-1},
{0,-1,0}
};
/****************** Initialize MPI ***********************/
ierr = MPI_Init(&argc, &argv);
if (argc != 3) {
fprintf(stderr,"Usage: %s infile outfile\n",argv[0]);
fprintf(stderr,"outputs the negative of the input file.\n");
return -1;
}
ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
ierr = MPI_Comm_size(MPI_COMM_WORLD, &size);
if (ierr) {
fprintf(stderr,"Catastrophic MPI problem; exiting\n");MPI_Abort(MPI_COMM_WORLD,1);
}
/****************** Master open image file for read ***********************/
if (rank == IONODE) {
rows=SIZE_X;
cols=SIZE_Y;
maxval=255;
FR=fopen(argv[1], "r+");
/****************** Read the header part of the image ***********************/
fgets(dummy,50,FR);
do{ fgets(dummy,50,FR); } while(dummy[0]=='#');
fgets(dummy,50,FR);
for (j = 0; j <cols; j++)
for (i = 0; i <rows; i++)
{
fscanf(FR,"%d",&temp1);
greys[i][j] = temp1;
}
/****************** Read pixel values ***********************/
ncells = rows*cols;
disps = (int *)malloc(size * sizeof(int));
counts= (int *)malloc(size * sizeof(int));
data = &(greys[0][0]); /* we know all the data is contiguous */
}
// Time every processor
//MPI_Barrier(MPI_COMM_WORLD);
//p_time = MPI_Wtime();
/****************** Everyone calculate their number of cells ***********************/
ierr = MPI_Bcast(&ncells, 1, MPI_INT, IONODE, MPI_COMM_WORLD);
myncells = ncells/size;
mystart = rank*myncells;
myend = mystart + myncells - 1;
if (rank == size-1) myend = ncells-1;
myncells = (myend-mystart)+1;
mydata = (int *)malloc(myncells * sizeof(int));
/****************** Assemble the list of counts. Might not be equal if don't divide evenly.***********************/
ierr = MPI_Gather(&myncells, 1, MPI_INT, counts, 1, MPI_INT, IONODE, MPI_COMM_WORLD);
if (rank == IONODE) {
disps[0] = 0;
for (i=1; i<size; i++) {
disps[i] = disps[i-1] + counts[i-1];
}
}
/****************** Scatter the data to all processor ***********************/
ierr = MPI_Scatterv(data, counts, disps, MPI_INT, mydata, myncells, MPI_INT, IONODE, MPI_COMM_WORLD);
/****************** All processor do AVERAGE FILTERING ***********************/
csx=smooth/2;
for (i=0; i<myncells; i++)
{
sum1=0;
sum2=0;
for(k=0;k<smooth;k++)
{
for(l=0;l<smooth;l++)
{
x=i+k-csx;
sum1+=w1[k][l]* mydata[mod(x,myncells)];
sum2+=w2[k][l]* mydata[mod(x,myncells)];
}
}
if((abs(sum1)+abs(sum2))>125)
mydata[i]=255;
else
mydata[i]=0;
}
/****************** Gather the data from all processor ***********************/
ierr = MPI_Gatherv(mydata, myncells, MPI_INT, data, counts, disps, MPI_INT, IONODE, MPI_COMM_WORLD);
//MPI_Barrier(MPI_COMM_WORLD);
//p2_time = MPI_Wtime();
//printf("\nProcessor %d = %g microseco开发者_JS百科nds\n", rank, (p2_time*1000000)-(p_time*1000000));
/****************** Master open image file for write ***********************/
if (rank == IONODE){
FW=fopen(argv[2], "w");
fprintf(FW,"P2\n%d %d\n255\n",rows,cols);
for(j=0;j<cols;j++)
for(i=0;i<rows;i++)
fprintf(FW,"%d ", greys[i][j]);
}
free(mydata);
if (rank == IONODE) {
free(counts);
free(disps);
}
if (rank == IONODE) {
fclose(FR);
fclose(FW);
}
MPI_Finalize();
return 0;
}
//Sub routine
/* periodic extension (outside of the image frame) */
int mod(int z, int l)
{
if( z >= 0 && z < l ) return z;
else
if( z < 0) return (z+l);
else
if( z > (l-1)) return (z-l);
return 0;
}
This is the sequential code that i need to integrate to program above
#include <stdio.h>
#include <stdlib.h>
#include<conio.h>
#include<time.h>
#include<math.h>
#define size_x 203
#define size_y 152
typedef struct
{
int imagesize_x, imagesize_y;
int **pixel;
}image_t;
image_t allocate_image(const int imagesize_x, const int imagesize_y);
int mod(int z, int l);
void main()
{
image_t image_in,image_out;
int m,n, temp;
int smooth,csx,csy;
int k,l,x,y,sum1,sum2;
FILE *cpp1,*cpp2;
char dummy[50]="";
//Laplacian Operator
static int w1[3][3]={
{0,-1,0},
{-1,4,-1},
{0,-1,0}
};
static int w2[3][3]={
{0,-1,0},
{-1,4,-1},
{0,-1,0}
};
cpp1=fopen("e:\\input_image\\A.pgm", "r+");
cpp2=fopen("e:\\output_image\\edge_lap.pgm", "w+");
fgets(dummy,50,cpp1);
do{
fgets(dummy,50,cpp1);
}while(dummy[0]=='#');
fgets(dummy,50,cpp1);
fprintf(cpp2,"P2\n%d %d\n255\n",(size_x),(size_y));
image_in = allocate_image(size_x,size_y);
image_out = allocate_image(size_x,size_y);
//Reading Input Image
for (n = 0; n < size_y; n++)
{
for (m = 0; m <size_x; m++)
{
fscanf(cpp1,"%d",&temp);
image_in.pixel[m][n] = temp;
}
}
/* Edge Detection */
smooth=3;
csx=smooth/2; csy=smooth/2;
//Edge detection
for (n = 0; n < size_y; n++) {
for (m = 0; m < size_x; m++) {
sum1=0;sum2=0;
for(k=0;k<smooth;k++){
for(l=0;l<smooth;l++) {
x=m+k-csx; y=n+l-csy;
sum1+=w1[k][l]* image_in.pixel[mod(x,size_x)][mod(y,size_y)];
sum2+=w2[k][l]* image_in.pixel[mod(x,size_x)][mod(y,size_y)];
}
}
if((fabs(sum1)+fabs(sum2))>125)
image_out.pixel[m][n]=255;
else
image_out.pixel[m][n]=0;
}
}
//Writing Edge Detected Image
for (n = 0; n < size_y; n++)
{
for (m = 0; m <size_x; m++)
{
fprintf(cpp2,"%d ",image_out.pixel[m][n]);
}
}
}
image_t allocate_image(const int imagesize_x, const int imagesize_y)
{
image_t result;
int x = 0, y = 0;
result.imagesize_x = imagesize_x;
result.imagesize_y = imagesize_y;
result.pixel =(int **) calloc(imagesize_x, sizeof(int*));
for(x = 0; x < imagesize_x; x++)
{
result.pixel[x] =(int*) calloc(imagesize_y, sizeof(int));
for(y = 0; y < imagesize_y; y++)
{
result.pixel[x][y] = 0;
}
}
return result;
}
int mod(int z, int l)
{
if( z >= 0 && z < l ) return z;
else
if( z < 0) return (z+l);
else
if( z > (l-1)) return (z-l);
}
This is the input image http://orion.math.iastate.edu/burkardt/data/pgm/balloons.pgm
I run with : mpirun -np 10 ./mysource balloons.pgm output.pgm
So in your previous question where you just needed to invert the image, the 2d structure of the image didn't matter and you could just divide the image into (number of pixels)/(number of processes) chunks and each could invert their pixels.
Here, though, the 2-d structure does matter; to apply the stencil, you need to have all of the neighbouring pixels. And on the edge of the image, you need the data from the other edge (in physics, we call this "periodic boundary conditions" -- the image wraps around like a torus).
So we need to decompose the data in a way that maintains the 2d structure of the data. We could do a full 2d decomposition; so if there were 6 processes, the image would be broken up like
+---+---+---+
| 0 | 1 | 2 |
+---+---+---+
| 3 | 4 | 5 |
+---+---+---+
(Here it is 2011 and I'm still doing ascii art to communicate things over computers. How did that happen?)
but it's simpler for now to do a 1-d decomposition; that is, we keep the 2d structure of the data but we only chop up the data along one dimension. In C, it's much easier to do this along rows, so the data pieces are contiguous in memory:
-----1------
-----2------
-----3------
-----4------
-----5------
-----6------
By far the easiest way to do this is to pad the number of rows in the original image so that it divides evenly between the number of tasks.
Now, also, if one task is in charge of applying the stencil to row zero, it needs data from row (nrows-1), and vice-versa; similarly for column 0 and column (ncols-1). So we'll pad the array by by two more rows and columns, and copy data from column (ncols-1) into column 0 when we read the data, etc.
So in that case, reading your image changes to something like this:
fgets(dummy,50,FR);
do{ fgets(dummy,50,FR); } while(dummy[0]=='#');
sscanf(dummy,"%d %d",&cols, &rows);
fgets(dummy,50,FR);
nrowspertask = (rows/size);
if (nrowspertask*size < rows) nrowspertask++;
int totrows = nrowspertask*size;
/* pad the number of rows so it divides evenly by # of tasks */
/* and then add 2 rows, 2 cols, for "wraparound" at edges */
image_t image;
image = allocate_image( cols+2, totrows+2 );
/****************** Read pixel values ***********************/
for (j = 0; j <cols; j++)
for (i = 0; i <rows; i++)
{
fscanf(FR,"%d",&temp1);
image.pixel[j+1][i+1] = temp1;
}
/* copy bottom row to top, top row to bottom */
for (j=1; j<cols+1; j++) {
image.pixel[j][0] = image.pixel[j][rows];
image.pixel[j][rows+1] = image.pixel[j][1];
}
/* copy leftmost col to right, rightmost col to left */
for (i=1; i<rows+1; i++) {
image.pixel[0][i] = image.pixel[cols][i];
image.pixel[cols+1][i] = image.pixel[1][i];
}
Now comes the decomposition. Each task is going to be in charge of nrowspertask rows; but they need the row above and below to do this job, and the padding on the side, too. So each image is going to have to receive (nrowspertask+2)*(cols+2)
ints, starting at &(image.pixel[rank*(nrowspercol)][0])
. We can still use MPI_Scatterv()
for this - we set up the counts and disps as
data = &(image.pixel[0][0]); /* we know all the data is contiguous */
disps = (int *)malloc(size * sizeof(int));
counts= (int *)malloc(size * sizeof(int));
for (i=0;i<size;i++) {
counts[i]=(nrowspertask+2)*(cols+2);
disps[i]=i*(nrowspertask)*(cols+2);
}
and then everyone gets their data:
locimage = allocate_image(cols+2, nrowspertask+2);
ierr = MPI_Scatterv(data, counts, disps, MPI_INT,&(locimage.pixel[0][0]),
(nrowspertask+2)*(cols+2), MPI_INT, IONODE, MPI_COMM_WORLD);
Now you do your filter very much like in the serial case, except you don't have to worry about mods because you have padded the data with the info you need.
You gatherv the data back into the array very similarly, but I'll leave you to figure that one out; you only send back the "real" rows, not the padded rows, or else you would be overwriting data.
精彩评论