how to change the __device__ variable in global and device functions in CUDA?
The procedure describe as follows:
#include <cuda.h>
#include <cutil_math>
#include <cuda_runtime.h>
#include <iostream>
struct testtype
{
float x;
int y;
char z;
};
__device__ testtype* gpu_config;
__global__
void test()
{
gpu_config->y = 3.0;
};
int main(void)
{
testtype cpu_config;
cpu_config.x = 1;
cpu_config.y = 2.0f;
cpu_config.z = 'A';
testtype val ;
if (cudaMalloc((void**) &gpu_config, sizeof(testtype)) != cudaSuccess)
{
return -1;
}
cudaMemcpy(gpu_config, &cpu_config, sizeof(testtype), cudaMemcpyHostToDevice);
test<<<1,1,0>>>();
cudaMemcpy(&val, gpu_config, sizeof(testtype), cudaMemcpyDeviceToHost);
std::cout << val.y << std::endl;
}
when I delete test<<<1,1,0>>>(); val is changed the same with gpu_config. but when has test<<<1,1,0>>>();, the val.y is not equal to 3.0 . it means that the global function test not change the value of val. I want to know how to change the _device_ variable the 开发者_如何转开发value through the global functions.
#include <stdio.h>
#include <cuda.h>
#include <cutil_math.h>
#include <cuda_runtime.h>
// check runtime call error
#define cudaSafeCall(call) { \
cudaError err = call; \
if(cudaSuccess != err){ \
fprintf(stderr, "%s(%i) : %s.\n", __FILE__, __LINE__, cudaGetErrorString(err)); \
exit(EXIT_FAILURE); \
}}
// check kernel launch error
#define cudaCheckErr(errorMessage) { \
cudaError_t err = cudaGetLastError(); \
if(cudaSuccess != err){ \
fprintf(stderr, "%s(%i) : %s : %s.\n", __FILE__, __LINE__, errorMessage, cudaGetErrorString(err)); \
exit(EXIT_FAILURE); \
}}
struct g{
int m;
};
__device__ struct g *d; // device (global)
__global__ void kernel()
{
int tid=blockIdx.x * blockDim.x + threadIdx.x;
d[tid].m=10;
}
int main()
{
size_t size = 1 * sizeof(struct g);
size_t sizep = 1 * sizeof(struct g*);
struct g *ld; // device (local)
cudaSafeCall(cudaMalloc(&ld, size));
cudaSafeCall(cudaMemcpyToSymbol(d,&ld,sizep));
kernel<<<1,1>>>();
cudaSafeCall(cudaDeviceSynchronize());
cudaCheckErr("kernel error");
struct g *h = (struct g*)malloc(size);
if(h==NULL){
fprintf(stderr, "%s(%i) : malloc error.\n", __FILE__, __LINE__);
exit(EXIT_FAILURE);
}
//cudaSafeCall(cudaMemcpyFromSymbol(&ld,d,sizep)); // not necessary
cudaSafeCall(cudaMemcpy(h, ld, size, cudaMemcpyDeviceToHost));
printf("Result: %d\n",h[0].m);
}
精彩评论