Ich bin neu in OpenCL Programmierung und versucht, den folgenden "Hallo Welt" Typen OpenCL-Programm in CUnbekannter Fehler in OpenCL-Programm
Kernel-Code (hello.cl)
__kernel void hello(__global int* input, __global int* output, const unsigned int count)
{
int i = get_global_id(0);
if(i < count)
output[i] = input[i] * input[i];
}
-Host-Code zu erstellen :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DATA_SIZE (10)
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAX_SOURCE_SIZE (0x100000)
int main()
{
size_t count = DATA_SIZE;
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
size_t global; // local domain size for our calculation
size_t local; // local domain size for our calculation
FILE *fp;
char fileName[] = "./hello.cl";
char *source_str;
size_t source_size;
/* Load the source code containing the kernel*/
fp = fopen(fileName, "r");
if (!fp)
{
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/* Get Platform and Device Info */
ret = clGetPlatformIDs (1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs (platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
if (ret != CL_SUCCESS)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}
/* Create OpenCL context */
context = clCreateContext (NULL, 1, &device_id, NULL, NULL, &ret);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return EXIT_FAILURE;
}
/* Create Command Queue */
command_queue = clCreateCommandQueue (context, device_id, 0, &ret);
if (!command_queue )
{
printf("Error: Failed to create a command commands!\n");
return EXIT_FAILURE;
}
/* Create Kernel Program from the source */
program = clCreateProgramWithSource (context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return EXIT_FAILURE;
}
/* Build Kernel Program */
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
/* Create OpenCL Kernel */
kernel = clCreateKernel(program, "hello", &ret);
if (!kernel || ret != CL_SUCCESS)
{
printf("Error: Failed to create compute kernel!\n");
exit(1);
}
int data[DATA_SIZE]; // original data set given to device
int results[DATA_SIZE]; // results returned from device
int i = 0;
for(i = 0; i < count; i++)
data[i] = i+1;
cl_mem input; // device memory used for the input array
cl_mem output; // device memory used for the output array
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * count, NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * count, NULL, NULL);
if (!input || !output)
{
printf("Error: Failed to allocate device memory!\n");
exit(1);
}
ret = clEnqueueWriteBuffer(command_queue, input, CL_TRUE, 0, sizeof(int) * count, data, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("Error: Failed to write to source array!\n");
exit(1);
}
/* Set OpenCL Kernel Parameters */
ret = 0;
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
ret |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
if (ret != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments! %d\n", ret);
exit(1);
}
/* Execute OpenCL Kernel */
ret = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
if (ret != CL_SUCCESS)
{
printf("Error: ! %d\n", ret);
scanf("%d",&global);
exit(1);
}
global = (size_t) count;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
clFinish(command_queue);
/* Copy results from the memory buffer */
ret = clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(int) * count, results, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("Error: Failed to read output array! %d\n", ret);
exit(1);
}
// Validate our results
//
int correct = 0;
for(i = 0; i < count; i++)
{
printf("%d-%d\n",data[i],results[i]);
if(results[i] == data[i] * data[i])
correct++;
}
// Print a brief summary detailing the results
//
printf("Computed '%d/%d' correct values!\n", correct, count);
scanf("%d",&ret);
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
clReleaseMemObject(input);
clReleaseMemObject(output);
free(source_str);
return 0;
}
Dieser sehr einfachen Code scheint, enthält das Ergebnis Array jedoch alle Null-Werte. Kann mir jemand sagen, welche Fehler ich hier mache? Die Ausgabe des Programms ist wie folgt:
1-0
2-0
3-0
4-0
5-0
6-0
7-0
8-0
9-0
10-0
Computed '0/10' correct values!
ich nicht in der Lage einen Grund finden, dass gpu nicht den richtigen Wert zu berechnen.
Und was ist der Fehler unterteilen können? Ist es ein Buildfehler? Ein Laufzeitfehler? Fehler beim Laden des Kernels? Bitte erläutern. Und bitte [lesen Sie, wie man gute Fragen stellt] (http://stackoverflow.com/help/how-to-ask). –
@Joachim Pileborg: Ich habe die Ausgabe hinzugefügt. Hoffe, das wird helfen. –
Sie überprüfen nicht alle Rückgabewerte der Aufrufe. – DarkZeros