开发者

Compiling smallpt with OpenMP causes infinite loop at runtime

I'm currently looking at the smallpt code by Keavin Beason. I compiled the code with what it says on the tin using g++ -O3 -fopenmp smallpt.cpp, and I'm running into what seems like either an infinite loop or a deadlock.

Compiling the code using just g++ -O3 smallpt.cpp produces the images seen on his page, but I can't get the OpenMP parallelization to work at all.

For reference, I'm compiling on a Windows 7 64-bit machine using Cygwin with GCC 4.5.0. The author himself has stated he's run the same exact code and has run into no issu开发者_JAVA百科es whatsoever, but I can't get the program to actually exit when it's done tracing the image.

Could this be an issue with my particular compiler and environment, or am I doing something wrong here? Here's the particular snippet of code that's parallelized using OpenMP. I've only modified it with some minor formatting to make it more readable.


int main(int argc, char *argv[])
{
  int w=1024, h=768, samps = argc==2 ? atoi(argv[1])/4 : 1;

  Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir
  Vec cx=Vec(w*.5135/h);
  Vec cy=(cx%cam.d).norm()*.5135, r, *c=new Vec[w*h];

  #pragma omp parallel for schedule(dynamic, 1) private(r)       // OpenMP
  for (int y=0; y<h; y++)                       // Loop over image rows
  {
    fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1));
    for (unsigned short x=0, Xi[3]={0,0,y*y*y}; x<w; x++)   // Loop cols
    {
      for (int sy=0, i=(h-y-1)*w+x; sy<2; sy++)     // 2x2 subpixel rows
      {
        for (int sx=0; sx<2; sx++, r=Vec())        // 2x2 subpixel cols
        {
          for (int s=0; s<samps; s++)
          {
            double r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
            double r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
            Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
                    cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
            r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
          } // Camera rays are pushed ^^^^^ forward to start in interior
          c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25;
        }
      }
    }
  }

  /*  PROBLEM HERE!
      The code never seems to reach here
      PROBLEM HERE!
  */
  FILE *f = fopen("image.ppm", "w");         // Write image to PPM file.
  fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  for (int i=0; i<w*h; i++)
    fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}

Here's the output that the program produces, when it runs to completion:

$ time ./a
Rendering (4 spp) 100.00%spp)  spp)   00..0026%%

The following is the most basic code that can reproduce the above behavior

#include <cstdio>
#include <cstdlib>
#include <cmath>

struct Vector
{
  double x, y, z;
  Vector() : x(0), y(0), z(0) {}
};

int toInt(double x)
{
  return (int)(255 * x);
}

double clamp(double x)
{
  if (x < 0) return 0;
  if (x > 1) return 1;
  return x;
}

int main(int argc, char *argv[])
{
  int w = 1024;
  int h = 768;
  int samples = 1;

  Vector r, *c = new Vector[w * h];

  #pragma omp parallel for schedule(dynamic, 1) private(r)
  for (int y = 0; y < h; y++)
  {
    fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samples * 4, 100. * y / (h - 1));
    for (unsigned short x = 0, Xi[3]= {0, 0, y*y*y}; x < w; x++)
    {
      for (int sy = 0, i = (h - y - 1) * w + x; sy < 2; sy++)
      {
        for (int sx = 0; sx < 2; sx++, r = Vector())
        {
          for (int s = 0; s < samples; s++)
          {
            double r1 = 2 * erand48(Xi), dx = r1 < 1 ? sqrt(r1) - 1 : 1 - sqrt(2 - r1);
            double r2 = 2 * erand48(Xi), dy = r2 < 1 ? sqrt(r2) - 1 : 1 - sqrt(2 - r2);
            r.x += r1;
            r.y += r2;
          }

          c[i].x += clamp(r.x) / 4;
          c[i].y += clamp(r.y) / 4;
        }
      }
    }
  }

  FILE *f = fopen("image.ppm", "w");         // Write image to PPM file.
  fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  for (int i=0; i<w*h; i++)
    fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}

This is the output obtained from the following sample program:

$ g++ test.cpp
$ ./a
Rendering (4 spp) 100.00%

$ g++ test.cpp -fopenmp
$ ./a
Rendering (4 spp) 100.00%spp)  spp)   00..0052%%


fprintf is not guarded by a critical section or a #pragma omp single/master. I wouldn't be surprised if on Windows this thing messes up the console.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜