#include "hip/hip_runtime.h" /* FILENAME: gpu_hello.cpp Copy the string "hello, world" from CPU to GPU and back using common CUDA methods. Naming convention of the GPU world: H_ - host (CPU) D_ - device (GPU) */ #include /* GPU library */ #include /* printf() */ /* Forward Reference */ __global__ void HelloWorld (char*,char*); int main(int argc, char** argv) { /* 1) The host initializes an array. */ /* - define source message and target array. */ /* - allocate memory on the host. */ char H_str1[] = "hello, world"; char H_str2[] = "XXXXXXXXXXXX"; /* Set device based on input from command line */ if (argc > 1) { if (hipSetDevice(atoi(argv[1])) != hipSuccess) { int num_devices; hipGetDeviceCount(&num_devices); fprintf(stderr, "Error initializing device %s,\ device value must be 0-%d\n", argv[1], (num_devices-1)); return 0; } } else { fprintf(stderr, "No GPU specified, using first GPU"); if (hipSetDevice(0) != hipSuccess) { int num_devices; hipGetDeviceCount(&num_devices); fprintf(stderr, "Error initializing device 0,\ device value must be 0-%d\n", (num_devices-1)); return 0; } } /* Allocate memory on the GPU device. */ char *D_str1, *D_str2; size_t size = sizeof(H_str1); /* 13 characters */ hipMalloc((void**)&D_str1, size); hipMalloc((void**)&D_str2, size); /* 2) Copy array from host memory to GPU memory. */ hipMemcpy(D_str1, H_str1, size, hipMemcpyHostToDevice); /* Set the grid and block sizes. */ dim3 dimGrid(1); dim3 dimBlock(size); /* one thread per character */ /* 3) GPU operates on the array. */ /* - invoke the kernel. */ hipLaunchKernelGGL(HelloWorld, dim3(dimGrid), dim3(dimBlock ), 0, 0, D_str1,D_str2); /* 4) Copy array from GPU memory to host memory. */ hipMemcpy(H_str2, D_str2, size, hipMemcpyDeviceToHost); /* Free up the allocated memory on the GPU. */ hipFree(D_str1); hipFree(D_str2); /* Display result of the copy. */ printf("%s\n", H_str2); return 0; } /* Device Kernel */ /* On the GPU, perform some computation (copy). */ __global__ void HelloWorld(char* str1, char* str2) { /* Determine thread ID. */ int idx = blockIdx.x * blockDim.x + threadIdx.x; /* Copy one element of the string. */ str2[idx] = str1[idx]; }