Hello again,
I have the following code:
#include <oclUtils.h>
#include <stdio.h>
#include "Timer.h"
int main()
{
cl_platform_id platform;
size_t szGlobalWorkSize;
size_t szLocalWorkSize=64;
szGlobalWorkSize = 64;
printf("Global Work Size = %d
",(int)szGlobalWorkSize);
cl_long message = 0x0123456789abcdef;
cl_long key = 0x133457799BBCDFF1;
cl_long encrypted_message=0x0;
Timer t1;
cl_int err = oclGetPlatformID(&platform);
if(err != CL_SUCCESS)
printf("O eroare la citirea platformei= %s
",oclErrorString(err));
cl_device_id gpuDevice;
err = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&gpuDevice,NULL);
if(err != CL_SUCCESS)
printf("O eroare la conectarea la dispozitivul de calcul= %s
",oclErrorString(err));
char cBuffer[1024];
err = clGetDeviceInfo(gpuDevice,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(cBuffer),&cBuffer,NULL);
if(err != CL_SUCCESS)
printf("O eroare la interogarea dispozitivului de calcul= %s
",oclErrorString(err));
printf("Max items: %s
",cBuffer);
cl_context gpuContext;
gpuContext = clCreateContext(0,1,&gpuDevice,NULL,NULL,&err);
if(err != CL_SUCCESS)
printf("O eroare la crearea contextului= %s
",oclErrorString(err));
cl_command_queue gpuCommandQueue;
gpuCommandQueue = clCreateCommandQueue(gpuContext,gpuDevice,0,&err);
if(err != CL_SUCCESS)
printf("O eroare la crearea cozii de comenzi= %s
",oclErrorString(err));
cl_program program;
size_t kernelLength ;
const char *kernelStr =oclLoadProgSource("kernel.cl","",&kernelLength);
program = clCreateProgramWithSource(gpuContext,1,&kernelStr,&kernelLength,&err);
err = clBuildProgram(program,0,NULL,NULL,NULL,NULL);
if(err != CL_SUCCESS)
printf("O eroare la compilarea kernelului= %s
",oclErrorString(err));
size_t retval;
clGetProgramBuildInfo(program,gpuDevice,CL_PROGRAM_BUILD_LOG,0,NULL,&retval);
char * buid_log = new char[retval+1];
clGetProgramBuildInfo(program,gpuDevice,CL_PROGRAM_BUILD_LOG,retval,buid_log,NULL);
buid_log[retval]='\0';
printf("%s
",buid_log);
cl_kernel kernel = clCreateKernel(program,"encrypt",&err);
cl_mem buf_a = clCreateBuffer(gpuContext,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(cl_long), &message,&err);
cl_mem buf_b = clCreateBuffer(gpuContext,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(cl_long), &key,&err);
cl_mem buf_c = clCreateBuffer(gpuContext,CL_MEM_WRITE_ONLY,sizeof(cl_long),&encrypted_message,&err);
err = clSetKernelArg(kernel,0,sizeof(cl_mem),(void *)&buf_a);
err = clSetKernelArg(kernel,1,sizeof(cl_mem),(void *)&buf_b);
err = clSetKernelArg(kernel,2,sizeof(cl_mem),(void *)&buf_c);
clFlush(gpuCommandQueue);
t1.start();
err = clEnqueueNDRangeKernel(gpuCommandQueue,kernel,1,NULL,&szGlobalWorkSize,&szLocalWorkSize,0,0,0);
clEnqueueReadBuffer(gpuCommandQueue,buf_c,CL_TRUE,0, sizeof(cl_long),&encrypted_message,0,0,0);
printf("Time: %f
",t1.getElapsedTimeInMilliSec());
printf("%ll
",encrypted_message);
clReleaseMemObject(buf_a);
clReleaseMemObject(buf_b);
}
and the kernel code is
unsigned long permute_pc1(unsigned long src,int tid){
unsigned long dst = 0;
int srcPos = 0;
unsigned char permutation[] = {
57, 49, 41, 33, 25, 17, 9,
1, 58, 50, 42, 34, 26, 18,
10, 2, 59, 51, 43, 35, 27,
19, 11, 3, 60, 52, 44, 36,
63, 55, 47, 39, 31, 23, 15,
7, 62, 54, 46, 38, 30, 22,
14, 6, 61, 53, 45, 37, 29,
21, 13, 5, 28, 20, 12, 4
};
if(tid<56)
{
srcPos = 64 - permutation[tid];
dst = (src>>srcPos & 0x01)<<(55-tid);
}
return dst;
}
__kernel void encrypt(__global unsigned long *message, __global unsigned long *key, __global unsigned long *encrypted)
{
unsigned int tid = get_global_id(0);
__local unsigned long cheie;
cheie = key[0];
unsigned long bit = permute_pc1(cheie,tid);
__local unsigned long result[64];
result[tid] = 0;
result[tid] = bit;
encrypted[0] = encrypted[0]|result[tid];
}
I want to do a simple parallel permutation according to the permutation matrix. All things go accordingly to purpose but when I do
encrypted[0] = encrypted[0]|result[tid];
encrypted is 0;
but if I do
if(tid==0)//or another value
encrypted[0] = encrypted[0]|result[tid];
encrypted has the designated bit set or unset according to the test values
Any hint on how to resolve this problem would be greatly appreciated!