problems with memories

Hi, i don’t know what i’m doing wrong, I want calc the sum of many values on array, but when i try put in a for loop with “sum = sum + 4;” for example, this lock my computer and i need reboot system.

this is my code now:

const char KernelSource = "
"
"__kernel void calcpi(
"
" __global float
input,
"
" __global float* output,
"
" const unsigned int count)
"
"{
"
" int i = get_global_id(0);
"
" int n=100; int k = in;
"
" __private float sum = 0;
"
" float z = 0;
"
" output[i] = 0;
"
" for (k=i
n;k<i+n;k++) {
"
" z = (float)k*2+1;
"
" sum = sum + 4.0 / z;
"
" }
"
" if(i < count)
"
" output[i] = sum;
"
"}
"
"
";

what i’m doing wrong? anyone can help me?
Thank you

.

dot is a good answer =)

might need to paste your relevant host code too.

Here is all of my code:

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <OpenCL/opencl.h>
////////////////////////////////////////////////////////////////////////////////
// Use a static data size for simplicity
//
#define DATA_SIZE (110000000)
////////////////////////////////////////////////////////////////////////////////
#define TIPO float
#define MULTI (100000000000000000000000000000000000.0f)
// Simple compute kernel that computes the calcpi of an input array. [1]
//
const char KernelSource = "
"
"__kernel void calcpi(
"
" __global float
input,
"
" __global float* output,
"
" const unsigned int count)
"
"{
"
" int i = get_global_id(0);
"
" int n=100; int k = in;
"
" __private float sum = 0;
"
" float z = 0;
"
" output[i] = 0;
"
" for (k=i
n;k<i+n;k++) {
"
" z = (float)k*2+1;
"
" sum = sum + 4.0f * 100000000000000000000000000000000000.0f / z;
"
" }
"
" if(i < count)
"
" output[i] = sum;
"
"}
"
"
";

////////////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv)
{
int err; // error code returned from api calls
//printf("%d",sizeof(TIPO));
//scanf("%d",&err);
TIPO data[2]; // original data set given to device
TIPO *results = malloc(sizeof(TIPO)*DATA_SIZE); // results returned from device
//unsigned int correct; // number of correct results returned
//printf(“TESTE”);

size_t global;                    // global domain size for our calculation
size_t local;                     // local domain size for our calculation

cl_device_id device_id;           // device ID
cl_context context;               // context
cl_command_queue queue;           // command queue
cl_program program;               // program
cl_kernel kernel;                 // kernel

cl_mem input;                     // device memory used for the input array
cl_mem output;                    // device memory used for the output array

// Get data on which to operate
//

//int i = 0;
//int n = 3;
unsigned long int count = DATA_SIZE;
//for(i = 0; i &lt; count; i+=2) {
    //data[i] = n;
	//n += 2;
//}
//printf("TESTE");
// Get an ID for the device                                    [2]
int gpu = 1;
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1,&device_id, NULL);
if (err != CL_SUCCESS)
	printf("ERROR clGetDeviceIDs!

"); // [3]

// Create a context                                            [4]
//
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context) {
	printf("ERROR clCreateContext

");
}

// Create a command queue                                              [5]
//
queue = clCreateCommandQueue(context, device_id, 0, &err);
if (!queue) {
	printf("ERROR clCreateCommandQueue

");
}

// Create the compute program from the source buffer                   [6]
//
program = clCreateProgramWithSource(context, 1,(const char **) & KernelSource, NULL, &err);
if ( !program) {
	printf("ERROR clCreateProgramWithSource

");
}

// Build the program executable                                        [7]
//
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
    size_t len;
    char buffer[2048];
	
    printf("Error: Failed to build program executable

“); //[8]
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,sizeof(buffer), buffer, &len);
printf(”%s
", buffer);
exit(1);
}

// Create the compute kernel in the program we wish to run            [9]
//
kernel = clCreateKernel(program, "calcpi", &err);
if (!kernel || err != CL_SUCCESS) {
	printf("ERROR clCreateKernel OR CL_SUCESS

");
}

// Create the input and output arrays in device memory for our calculation
//                                                                   [10]
input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(TIPO)*2,NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(TIPO) *count,NULL, NULL);
if (!input || !output) {
	printf("ERROR !INPUT OR !OUTPUT

");
}

// Write our data set into the input array in device memory          [11]
//
err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0,sizeof(TIPO)*2, data, 0, NULL, NULL);
if (err != CL_SUCCESS) {
	printf("ERROR clEnqueueWriteBuffer

");
}

// Set the arguments to our compute kernel                           [12]
//
err = 0;
err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
if (err != CL_SUCCESS) {
	printf("ERROR ARGUMENTS COMPUTE KERNEL - ERROR NUMBER: %d

",err);
exit(1);
}

// Get the maximum work-group size for executing the kernel on the device
//                                                                   [13]
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t), &local, NULL);
if (err != CL_SUCCESS) {
	printf("ERROR MAXIMUM WORK-GROUP - ERROR NUMBER: %d

",err);
exit(1);
}

// Execute the kernel over the entire range of the data set          [14]
//
global = count;
//printf("TESTE");
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL,0, NULL, NULL);
if (err) {
	printf("ERROR EXECUTE KERNEL - ERROR NUMBER: %d

",err);
printf("ERROS: CL_INVALID_PROGRAM_EXECUTABLE %d
", CL_INVALID_PROGRAM_EXECUTABLE);
printf("ERROS: CL_INVALID_COMMAND_QUEUE %d
", CL_INVALID_COMMAND_QUEUE);
printf("ERROS: CL_INVALID_KERNEL %d
", CL_INVALID_KERNEL);
printf("ERROS: CL_INVALID_CONTEXT %d
", CL_INVALID_CONTEXT);
printf("ERROS: CL_INVALID_KERNEL_ARGS %d
", CL_INVALID_KERNEL_ARGS);
printf("ERROS: CL_INVALID_WORK_DIMENSION %d
", CL_INVALID_WORK_DIMENSION);
printf("ERROS: CL_INVALID_WORK_GROUP_SIZE %d
", CL_INVALID_WORK_GROUP_SIZE);
printf("ERROS: CL_MEM_OBJECT_ALLOCATION_FAILURE %d
", CL_MEM_OBJECT_ALLOCATION_FAILURE);
printf("ERROS: CL_INVALID_WORK_ITEM_SIZE %d
", CL_INVALID_WORK_ITEM_SIZE);
printf("ERROS: CL_INVALID_GLOBAL_OFFSET %d
", CL_INVALID_GLOBAL_OFFSET);
printf("ERROS: CL_OUT_OF_RESOURCES %d
", CL_OUT_OF_RESOURCES);
printf("ERROS: CL_INVALID_EVENT_WAIT_LIST %d
", CL_INVALID_EVENT_WAIT_LIST);
printf("ERROS: CL_OUT_OF_HOST_MEMORY %d
", CL_OUT_OF_HOST_MEMORY);
exit(1);
}

// Wait for the command queue to get serviced before reading back results
//                                                                   [15]
clFinish(queue);

// Read the results from the device                                  [16]
//
err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,sizeof(TIPO) *count, results, 0, NULL, NULL );
if (err != CL_SUCCESS) {
	printf("ERROR READ RESULTS - ERROR NUMBER: %d

",err);
printf("ERROS: CL_INVALID_PROGRAM_EXECUTABLE %d
", CL_INVALID_PROGRAM_EXECUTABLE);
printf("ERROS: CL_INVALID_COMMAND_QUEUE %d
", CL_INVALID_COMMAND_QUEUE);
printf("ERROS: CL_INVALID_KERNEL %d
", CL_INVALID_KERNEL);
printf("ERROS: CL_INVALID_CONTEXT %d
", CL_INVALID_CONTEXT);
printf("ERROS: CL_INVALID_KERNEL_ARGS %d
", CL_INVALID_KERNEL_ARGS);
printf("ERROS: CL_INVALID_WORK_DIMENSION %d
", CL_INVALID_WORK_DIMENSION);
printf("ERROS: CL_INVALID_WORK_GROUP_SIZE %d
", CL_INVALID_WORK_GROUP_SIZE);
printf("ERROS: CL_MEM_OBJECT_ALLOCATION_FAILURE %d
", CL_MEM_OBJECT_ALLOCATION_FAILURE);
printf("ERROS: CL_INVALID_WORK_ITEM_SIZE %d
", CL_INVALID_WORK_ITEM_SIZE);
printf("ERROS: CL_INVALID_GLOBAL_OFFSET %d
", CL_INVALID_GLOBAL_OFFSET);
printf("ERROS: CL_OUT_OF_RESOURCES %d
", CL_OUT_OF_RESOURCES);
printf("ERROS: CL_INVALID_EVENT_WAIT_LIST %d
", CL_INVALID_EVENT_WAIT_LIST);
printf("ERROS: CL_OUT_OF_HOST_MEMORY %d
", CL_OUT_OF_HOST_MEMORY);
exit(1);

}
//printf("TESTE");
TIPO pi = 0.0;
int i;
for (i=0;i&lt;count-1;i++) {
	//printf("%f",results[i]);
	pi += (pow(-1.0,i)) * (TIPO) results[i];
	//pi = (TIPO) results[i];
	printf("LOCAL ID: %f

“,results[i]);
//printf(”%f",(pow(-1,i)));
//pi += (pow(-1.0,i));
}
printf("VE: 3.14159265358979323846
");
printf(“PI: %1.60lf”,pi/MULTI);

// Shut down and clean up
//
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);

scanf("%d",&i);
return 0;

}

if anyone can help me i would appreciate very much.
Thank you

Dunno if this helps, but you need to change
unsigned long int count = DATA_SIZE;
to
unsigned int count = DATA_SIZE;
to match
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
and
" const unsigned int count)
" \

Also the for-loop rarely runs, do you really want "k=in; k<i+n; k++", for example, k=in where i*n=4300 and k<i+n where i+n=143 would never execute.

Actually it runs just 101 times?

100 times for i=0
1 time for i=1
0 times for i=2 through 110000000

Assuming the host code isn’t printing errors about allocation failures (it will need to allocate 440 million bytes, which seems a lot), then there doesn’t seem any obvious reason it should fail. Although it doesn’t seem like it will do anything useful …

Try commenting out the call to clEnqueueNDRangeKernel() to make sure the rest is executing properly without having to reboot every time.

BTW, I can run your code (with minor mods to adapt it) on my system, that is, “this locks my computer and i need reboot system” doesn’t happen on my implementation.

Hi, thank you for yours time.
I try to do what you suggested, but doesn’t work yet.
I change the type value of “long unsigned int” for “unsigned int”, try change the minor value of DATA_SIZE and commented line clEnqueueNDRangeKernel(), works fine, but nothing happen…
and bwatt: what is your gpu? mine is a “AMD Radeon HD 6750M 1024 MB”

Don’t have a GPU, using a CPU device. Sorry.

How can running with a CPU driver possibly lock the system?

This should be impossible: opencl processes are no different from any other cpu process. Unlike the GPU drivers, bugs in them can’t affect the CPU’s operation.

Unless your operating system is really broken or more likely you have faulty hardware.

notzed, these are two different users. bwatt doesn’t have a GPU at hand. thamerhatem says their system locks up.

oh ooops, didn’t notice that. ta.