Need help in OpenCL host code for indirect addressing

Hi, I am very new to OpenCL. I am looking for some host code in OpenCL that does the indirect addressing. Actually, I am trying to implement ‘SPMV’.

My issue is when I am trying to read data, the vector ‘d_x’. I am not reading the right value. Here, I am reading ‘d_x’ from a particular location(d_idx) that is stored and which is indirect addressing.

Can anybody please provide me any sample host code which has indirect addressing?

kernel code:
#define USE_KAHAN_IN_SPMV 0
__kernel void kernel_sspmv_csr( const int rows , __global const int* d_ptr , __global const int* d_idx ,
__global const float* d_val , __global const float* d_x, __global float* d_y, const float alpha ) {

/* Starting point for this block /
int ctaStart = get_group_id(0) * get_local_size(0);
/
Total no. of threads in the kernel /
int totalThreads = get_global_size(0);
/
Get current thread /
int tx = get_local_id(0);
//printf("Inside csr
"); /
Read the data*/
for (int i = ctaStart + tx; i < rows; i += totalThreads) {
/* Read the beginning and end of the row * which will be processed by this thread */
int iRowBeg = d_ptr[i] - 1;
int iRowEnd = d_ptr[i+1] - 1;

#if USE_KAHAN_IN_SPMV 
	float sum = d_val[iRowBeg] * d_x[(d_idx[iRowBeg]) - 1]; 
	float c = 0.0;
		for (int j = iRowBeg + 1; j &lt; iRowEnd; j++) { 
		float y = d_val[j] * d_x[(d_idx[j]) - 1] - c;
		float t = sum + y;
		c = (t - sum) - y;
		sum = t;
		} 
#else 
	float sum = 0; 
	for (int j = iRowBeg; j &lt; iRowEnd; j++) {
	sum += d_val[j] * d_x[(d_idx[j]) - 1]; 
	} 
#endif
d_y[i] += alpha * sum;

}
}

Prtial Host code:

d_val_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY ,
    rows_per_device[i] * sizeof(float), NULL, &status);   //rows_per_device[i] = rows

d_idx_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY ,
    rows_per_device[i]  * sizeof(int), NULL, &status);

d_x_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY,
    rows * sizeof(float), NULL, &status);

d_ptr_buf[i] = clCreateBuffer(context, CL_MEM_READ_ONLY,    // nnz number of total NZ+1
    nnz * sizeof(int), NULL, &status);

output_buf[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
    rows_per_device[i] * sizeof(float), NULL, &status);