Hi, I am very new to OpenCL. I am looking for some host code in OpenCL that does the indirect addressing. Actually, I am trying to implement ‘SPMV’.
My issue is when I am trying to read data, the vector ‘d_x’. I am not reading the right value. Here, I am reading ‘d_x’ from a particular location(d_idx) that is stored and which is indirect addressing.
Can anybody please provide me any sample host code which has indirect addressing?
kernel code:
#define USE_KAHAN_IN_SPMV 0
__kernel void kernel_sspmv_csr( const int rows , __global const int* d_ptr , __global const int* d_idx ,
__global const float* d_val , __global const float* d_x, __global float* d_y, const float alpha ) {
/* Starting point for this block /
int ctaStart = get_group_id(0) * get_local_size(0);
/ Total no. of threads in the kernel /
int totalThreads = get_global_size(0);
/ Get current thread /
int tx = get_local_id(0);
//printf("Inside csr
"); / Read the data*/
for (int i = ctaStart + tx; i < rows; i += totalThreads) {
/* Read the beginning and end of the row * which will be processed by this thread */
int iRowBeg = d_ptr[i] - 1;
int iRowEnd = d_ptr[i+1] - 1;
#if USE_KAHAN_IN_SPMV
float sum = d_val[iRowBeg] * d_x[(d_idx[iRowBeg]) - 1];
float c = 0.0;
for (int j = iRowBeg + 1; j < iRowEnd; j++) {
float y = d_val[j] * d_x[(d_idx[j]) - 1] - c;
float t = sum + y;
c = (t - sum) - y;
sum = t;
}
#else
float sum = 0;
for (int j = iRowBeg; j < iRowEnd; j++) {
sum += d_val[j] * d_x[(d_idx[j]) - 1];
}
#endif
d_y[i] += alpha * sum;
}
}
Prtial Host code:
d_val_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY ,
rows_per_device[i] * sizeof(float), NULL, &status); //rows_per_device[i] = rows
d_idx_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY ,
rows_per_device[i] * sizeof(int), NULL, &status);
d_x_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY,
rows * sizeof(float), NULL, &status);
d_ptr_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY, // nnz number of total NZ+1
nnz * sizeof(int), NULL, &status);
output_buf[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
rows_per_device[i] * sizeof(float), NULL, &status);