A simple MPI program
I appreciate it if somebody tell me why this simple MPI send and receive code doesn't run on two processors, when the value of n=40(at line 20), but works for n <=30. In other words, if the message size goes beyond an specific number (which is not that large, roughly a 1-D array of size 8100) the MPI deadlocks.
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include "iostream"
#include "math.h"
using namespace std;
int main(int argc, char *argv[])
{
int processor_count, processor_rank;
double *buff_H, *buff_send_H;
int N_pa_prim1, l, n, N_p0;
MPI_Status status;
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);
N_pa_prim1=14; l=7; n=40; N_p0=7;
buff_H = new double [n*n*N_p0+1]; //Receive buffer allocation
buff_send_H = new double [n*n*N_p0+1]; //Send buffer allocation
for (int j = 0; j < n*n*N_p0+1; j++)
buff_send_H[j] = 1e-8*rand();
if (process开发者_JS百科or_rank == 0)
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);
else if(processor_rank == 1)
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);
MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
cout << "Received successfully by " << processor_rank << endl;
MPI_Finalize();
return 0;
}
The deadlocking is correct behaviour; you have a deadlock in your code.
The MPI Specification allows MPI_Send
to behave as MPI_Ssend
-- that is, to be blocking. A blocking communications primitive does not return until the communications "have completed" in some sense, which (in the case of a blocking send) probably means the receive has started.
Your code looks like:
If Processor 0:
Send to processor 1
If Processor 1:
Send to processor 0
Receive
That is -- the receive doesn't start until the sends have completed. You're sending, but they'll never return, because no one is receiving! (The fact that this works for small messages is an implementation artifact - most mpi implementations use so called a so-called "eager protocol" for "small enough" messages; but this can't be counted upon in general.)
Note that there are other logic errors here, too -- this program will also deadlock for more than 2 processors, as processors of rank >= 2 will be waiting for a message which never comes.
You can fix your program by alternating sends and receives by rank:
if (processor_rank == 0) {
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD);
MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
} else if (processor_rank == 1) {
MPI_Recv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &status);
MPI_Send(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD);
}
or by using MPI_Sendrecv (which is a blocking (send + receive), rather than a blocking send + a blocking receive):
int sendto;
if (processor_rank == 0)
sendto = 1;
else if (processor_rank == 1)
sendto = 0;
if (processor_rank == 0 || processor_rank == 1) {
MPI_Sendrecv(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, sendto, 163,
buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163,
MPI_COMM_WORLD, &status);
}
Or by using non-blocking sends and receives:
MPI_Request reqs[2];
MPI_Status statuses[2];
if (processor_rank == 0) {
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[0]);
} else if (processor_rank == 1) {
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[0]);
}
if (processor_rank == 0 || processor_rank == 1)
MPI_Irecv(buff_H, n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall(2, reqs, statuses);
Thank you Jonathan for your help. Here I have chosen the third solution and written a similar code to yours except adding "for" loops to send a number of messages. This time it doesn't deadlock; however processors keep on receiving only the last message. (since the messages are long, I've only printed their last elements to check the consistency)
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <math.h>
using namespace std;
int main(int argc, char *argv[])
{
int processor_count, processor_rank;
//Initialize MPI
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &processor_count);
MPI_Comm_rank (MPI_COMM_WORLD, &processor_rank);
double **buff_H, *buff_send_H;
int N_pa_prim1, l, n, N_p0, count, temp;
N_pa_prim1=5; l=7; n=50; N_p0=7;
MPI_Request reqs[N_pa_prim1];
MPI_Status statuses[N_pa_prim1];
buff_H = new double *[N_pa_prim1]; //Receive buffer allocation
for (int i = 0; i < N_pa_prim1; i++)
buff_H[i] = new double [n*n*N_p0+1];
buff_send_H = new double [n*n*N_p0+1]; //Send buffer allocation
if (processor_rank == 0) {
for (int i = 0; i < N_pa_prim1; i++){
for (int j = 0; j < n*n*N_p0+1; j++)
buff_send_H[j] = 2.0325e-8*rand();
cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 1, 163, MPI_COMM_WORLD, &reqs[i]);
}
}
else if (processor_rank == 1) {
for (int i = 0; i < N_pa_prim1; i++){
for (int j = 0; j < n*n*N_p0+1; j++)
buff_send_H[j] = 3.5871e-8*rand();
cout << processor_rank << "\t" << buff_send_H[n*n*N_p0] << "\t" << "Send" << "\t" << endl;
MPI_Isend(buff_send_H, n*n*N_p0+1, MPI_DOUBLE, 0, 163, MPI_COMM_WORLD, &reqs[i]);
}
}
for (int i = 0; i < N_pa_prim1; i++)
MPI_Irecv(buff_H[i], n*n*N_p0+1, MPI_DOUBLE, MPI_ANY_SOURCE, 163, MPI_COMM_WORLD, &reqs[N_pa_prim1+i]);
MPI_Waitall(2*N_pa_prim1, reqs, statuses);
for (int i = 0; i < N_pa_prim1; i++)
cout << processor_rank << "\t" << buff_H[i][n*n*N_p0] << "\t" << "Receive" << endl;
MPI_Finalize();
return 0;
}
精彩评论