开发者

how to change the __device__ variable in global and device functions in CUDA?

The procedure describe as follows:

#include <cuda.h> 
#include <cutil_math>  
#include <cuda_runtime.h>  
#include <iostream>

struct testtype  
{
    float x;  
    int y;  
    char z;  
};

 __device__   testtype* gpu_config;

__global__
void test()
{
    gpu_config->y = 3.0;
};

int main(void)  
{  
testtype cpu_config;

cpu_config.x = 1;
cpu_config.y = 2.0f;
cpu_config.z = 'A';

testtype val ;

if (cudaMalloc((void**) &gpu_config, sizeof(testtype)) != cudaSuccess)
{
    return -1;
}

cudaMemcpy(gpu_config, &cpu_config, sizeof(testtype), cudaMemcpyHostToDevice);

test<<<1,1,0>>>();

cudaMemcpy(&val, gpu_config, sizeof(testtype), cudaMemcpyDeviceToHost);

std::cout << val.y << std::endl;
}   

when I delete test<<<1,1,0>>>(); val is changed the same with gpu_config. but when has test<<<1,1,0>>>();, the val.y is not equal to 3.0 . it means that the global function test not change the value of val. I want to know how to change the _device_ variable the 开发者_如何转开发value through the global functions.


#include <stdio.h>
#include <cuda.h>
#include <cutil_math.h>
#include <cuda_runtime.h>

// check runtime call error
#define cudaSafeCall(call) {  \
  cudaError err = call;       \
  if(cudaSuccess != err){     \
    fprintf(stderr, "%s(%i) : %s.\n", __FILE__, __LINE__, cudaGetErrorString(err));   \
    exit(EXIT_FAILURE);       \
}}

// check kernel launch error
#define cudaCheckErr(errorMessage) {    \
  cudaError_t err = cudaGetLastError(); \
  if(cudaSuccess != err){               \
    fprintf(stderr, "%s(%i) : %s : %s.\n", __FILE__, __LINE__, errorMessage, cudaGetErrorString(err)); \
    exit(EXIT_FAILURE);                 \
}}

struct g{
  int m;
};
__device__ struct g *d; // device (global)

__global__ void kernel()
{
  int tid=blockIdx.x * blockDim.x + threadIdx.x;
  d[tid].m=10;
}

int main()
{
  size_t size = 1 * sizeof(struct g);
  size_t sizep = 1 * sizeof(struct g*);
  struct g *ld; // device (local)
  cudaSafeCall(cudaMalloc(&ld, size));
  cudaSafeCall(cudaMemcpyToSymbol(d,&ld,sizep));
  kernel<<<1,1>>>();
  cudaSafeCall(cudaDeviceSynchronize());
  cudaCheckErr("kernel error");
  struct g *h = (struct g*)malloc(size);
  if(h==NULL){
     fprintf(stderr, "%s(%i) : malloc error.\n", __FILE__, __LINE__);
    exit(EXIT_FAILURE);
  }
  //cudaSafeCall(cudaMemcpyFromSymbol(&ld,d,sizep)); // not necessary
  cudaSafeCall(cudaMemcpy(h, ld, size, cudaMemcpyDeviceToHost));
  printf("Result: %d\n",h[0].m);
}
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜