/*!
* Sample kernel which multiplies every element of the input array with
* a constant and stores it at the corresponding output array
*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#define Mt 4
#define Mr 4
#define MOD_SCHEME 16
#define bitSize 4
#define STACKSIZE 100
#define initial_SC 100000.0f
#define CLIP 100000.0f
/* Complex Number Struct */
typedef struct my_comp
{
float2 data;
} comp;
inline comp mul(comp num1, comp num2)
{
comp temp;
temp.data.x = num1.data.x*num2.data.x - num1.data.y*num2.data.y;
temp.data.y = num1.data.x*num2.data.y + num1.data.y*num2.data.x;
return temp;
}
inline comp div(comp num1, comp num2)
{
comp temp;
float mag = num2.data.x*num2.data.x + num2.data.y*num2.data.y;
temp.data.x = (num1.data.x*num2.data.x + num1.data.y*num2.data.y)/mag;
temp.data.y = (num1.data.y*num2.data.x - num1.data.x*num2.data.y)/mag;
return temp;
}
inline comp add(comp num1, comp num2)
{
comp temp;
temp.data.x = num1.data.x + num2.data.x;
temp.data.y = num1.data.y + num2.data.y;
return temp;
}
inline comp sub(comp num1, comp num2)
{
comp temp;
temp.data.x = num1.data.x - num2.data.x;
temp.data.y = num1.data.y - num2.data.y;
return temp;
}
void equal(comp *num1, comp num2)
{
num1->data.x = num2.data.x;
num1->data.y = num2.data.y;
}
typedef struct _node
{
int level, index;
float ped;
}node;
typedef struct stack
{
node n[STACKSIZE];
int top;
}stack_class;
void init_stack(stack_class *s)
{
s->top = -1;
}
void push(stack_class *s, node _n)
{
s->top = s->top+1;
if(s->top == STACKSIZE)
printf("PROBLEM STACK IS FULL\n");
s->n[s->top] = _n;
}
node pop(stack_class *s)
{
if(s->top>=0)
{
node nn = s->n[s->top];
s->top = s->top - 1;
return nn;
}
else
{
node temp;
temp.index = -456;
printf("STACK IS EMPTY CANT POP\n");
return temp;
}
}
bool is_empty(stack_class *s)
{
if(s->top == -1)
return true;
return false;
}
/* Only for 16QAM */
void get_cord_val(int *qam_sym, comp *x)
{
int xVal=0, yVal=0;
float xCor, yCor;
for(int i=0;i<bitSize/2;i++)
yVal = 2*yVal + qam_sym[i];
for(int i=bitSize/2;i<bitSize;i++)
xVal = 2*xVal + qam_sym[i];
switch(xVal)
{
case 0:
xCor=-3;
break;
case 1:
xCor=-1;
break;
case 3:
xCor=1;
break;
case 2:
xCor=3;
break;
}
switch(yVal)
{
case 0:
yCor=-3;
break;
case 1:
yCor=-1;
break;
case 3:
yCor=1;
break;
case 2:
yCor=3;
break;
}
x->data.x = xCor;
x->data.y = yCor;
}
void get_symbol(comp y, comp *parent_sig, __global float *R_re, __global float *R_im, int level, int tx,
comp *cor)
{
int yRe, yIm;
int min_x, max_x, min_y, max_y, j=0, i=0;
comp y_scaled;
equal(&y_scaled, y);
comp temp, temp2;
for(int i=level+1; i<tx; ++i)
{
temp.data.x = 1.0f*parent_sig[i].data.x;
temp.data.y = 1.0f*parent_sig[i].data.y;
temp2.data.x = R_re[i];
temp2.data.y = R_im[i];
y_scaled = sub(y_scaled,mul(temp2,temp));
}
temp2.data.x = R_re[level];
temp2.data.y = R_im[level];
y_scaled = div(y_scaled, temp2);
float y_re = y_scaled.data.x, y_im = y_scaled.data.y;
if(MOD_SCHEME == 16) //16 QAM
{
if(y_re < -2.0f)
yRe = -3;
else if(y_re >= -2.0f && y_re < 0.0f)
yRe = -1;
else if(y_re >= 0.0f && y_re < 2.0f)
yRe = 1;
else
yRe = 3;
if(y_im < -2.0f)
yIm = -3;
else if(y_im >= -2.0f && y_im < 0.0f)
yIm = -1;
else if(y_im >= 0.0f && y_im < 2.0f)
yIm = 1;
else
yIm = 3;
cor[0].data.x = yRe;
cor[0].data.y = yIm;
j=1;
min_x = min_y = -3;
max_x = max_y = 3;
i = 1;
int k, flag = 0;
while(j<16)
{
//cout<<"get symbol 2\n";
min_x = yRe-2*i;
max_x = yRe+2*i;
min_y = yIm-2*i;
max_y = yIm+2*i;
int min = min_x, max=max_x, x, y=max_y;
k=min;
flag = 0;
//while box is not complete
for(unsigned int l=0;l<4;++l)
{
while(k!=max)
{
if(!flag || flag == 2)
{
if(k >= -3 && k <= 3 && y >= -3 && y<= 3)
{
cor[j].data.x = k*1.0f;
cor[j].data.y = y*1.0f;
++j;
}
}
else if (flag == 1 || flag == 3)
{
if(k >= -3 && k <= 3 && x >= -3 && x<= 3)
{
cor[j].data.x = x;
cor[j].data.y = k;
++j;
}
}
if(!flag || flag == 3)
k+= 2;
else
k-= 2;
}
if(!flag)
{
flag = 1;
min = max_y;
max = min_y;
x = max_x;
}
else if(flag == 1)
{
flag = 2;
min = max_x;
max = min_x;
y = min_y;
}
else if(flag == 2)
{
flag = 3;
min = min_y;
max = max_y;
x = min_x;
}
k = min;
}
++i;
}
}
}
float getPed(comp y, __global float *R_re, __global float *R_im, comp *parent_sig, comp sym, int curr_level, float ped_parent)
{
comp b, e;
b.data.x = 0;
b.data.y = 0;
e.data.x = 0;
e.data.y = 0;
comp sig, temp2;
for(int i=curr_level+1; i<Mt; ++i)
{
sig.data.x = 1.0f*parent_sig[i].data.x;
sig.data.y = 1.0f*parent_sig[i].data.y;
temp2.data.x = R_re[i];
temp2.data.y = R_im[i];
b = add(b, mul(temp2,sig));
}
b = sub(y,b);
temp2.data.x = R_re[curr_level];
temp2.data.y = R_im[curr_level];
sig = mul(temp2,sym);
e = sub(b,sig);
float val = ped_parent + (e.data.x*e.data.x + e.data.y*e.data.y);
return val;
}
void get_bits(comp qam_sym, int *bits)
{
if(MOD_SCHEME == 16)
{
if(qam_sym.data.y == -3)
{
bits[0] = bits[1] = 0;
}
else if(qam_sym.data.y == -1)
{
//bits[0] = 1; bits[1] = 0;
bits[0] = 0; bits[1] = 1;
}
else if(qam_sym.data.y == 1)
{
bits[0] = bits[1] = 1;
}
else //if(qam_sym.imag() == 3)
{
//bits[0] = 0; bits[1] = 1;
bits[0] = 1; bits[1] = 0;
}
if(qam_sym.data.x == -3)
{
bits[2] = bits[3] = 0;
}
else if(qam_sym.data.x == -1)
{
//bits[2] = 1; bits[3] = 0;
bits[2] = 0; bits[3] = 1;
}
else if(qam_sym.data.x == 1)
{
bits[2] = bits[3] = 1;
}
else //if(qam_sym.real() == 3)
{
//bits[2] = 0; bits[3] = 1;
bits[2] = 1; bits[3] = 0;
}
}
}
__kernel void sphere_decoder(const int block_length,
const float noise_power,
__global float *block_data,
const int LIST_SIZE,
__global float *llr,
__global float *cand_dist,
__global float *cand_sym,
__global float *R_re,
__global float *R_im,
__global float *qr_noise_re,
__global float *qr_noise_im)
{
//printf("f hello \n");
uint tid = get_global_id(0);
//printf("f hello %d\n",tid);
int bitstream[Mt*bitSize], stride = Mt*bitSize;
for(int i=0; i<stride; ++i)
bitstream[i] = block_data[tid*stride + i];
comp x[Mt], y[Mr], best_sig[Mr], parents[Mr];
int level = Mt-1, index_level[Mr];
for(int i = 0; i<Mt; ++i)
{
get_cord_val(bitstream+i*Mt, &x[i]);
}
//// generate the y vector
//
comp temp_const0, temp2;
temp_const0.data.x = 0.0f;
temp_const0.data.y = 0.0f;
for(int i=0; i<Mr; ++i)
{
equal(&y[i],temp_const0);
equal(&best_sig[i],temp_const0);
equal(&parents[i],temp_const0);
index_level[i] = 0;
for(int j=0; j<Mt; ++j)
{
temp2.data.x = R_re[i*Mt+j];
temp2.data.y = R_im[i*Mt+j];
y[i] = add(y[i],mul(temp2, x[j]));
// printf("IN LOOP %f+j%f\n",y[i].data.x, y[i].data.y);
}
// adding noise here
temp2.data.x = qr_noise_re[i];
temp2.data.y = qr_noise_im[i];
//y[i] = add(y[i], temp2); //FIXME disabling the noise
}
stack_class s;
init_stack(&s);
node nn;
nn.level = level+1;
nn.index = -1;
nn.ped = 0.0f;
push(&s, nn);
float ped_parent = 0.0f, SC_ML = initial_SC, max_cand_dist = 0.0f,
bit_plus_1_dist, bit_minus_1_dist;
comp cor[Mt*MOD_SCHEME];
int list_size = 0, max_cand_index = 0, i, j,
curr_level = nn.level, temp_bits[bitSize];
while(!is_empty(&s))
{
for(j=curr_level-1; j>=0; --j)
{
get_symbol(y[j], parents, R_re+j*Mt, R_im+j*Mt, j, Mt, cor+j*MOD_SCHEME);
nn.ped = getPed(y[j], R_re+j*Mt, R_im+j*Mt, parents, cor[j*MOD_SCHEME + index_level[j]], j, ped_parent);
equal(&parents[j],cor[j*MOD_SCHEME+index_level[j]]);
nn.index = index_level[j]++;
nn.level = j;
ped_parent = nn.ped;
if(j == 0 && nn.ped < SC_ML) //reached the leaf node
{
if(list_size < LIST_SIZE)
{
cand_dist[tid*LIST_SIZE + list_size] = nn.ped;
if(cand_dist[tid*LIST_SIZE + list_size] > max_cand_dist)
{
max_cand_dist = cand_dist[tid*LIST_SIZE + list_size];
max_cand_index = list_size;
}
for(int k=0; k<Mt; k++)
{
get_bits(cor[k*MOD_SCHEME+index_level[k]-1],temp_bits);
for(int l=0;l<bitSize;++l)
{
cand_sym[list_size*block_length+ tid*stride + k*bitSize+l] = (float)temp_bits[l];
if(temp_bits[l]==1)
cand_sym[list_size*block_length+ tid*stride + k*bitSize+l] = -1.0f;
else if(temp_bits[l]==0)
cand_sym[list_size*block_length+ tid*stride + k*bitSize+l] = 1.0f;
}
}
list_size++;
}
else if(nn.ped < max_cand_dist)
{
cand_dist[max_cand_index+tid*LIST_SIZE] = nn.ped;
/* Replace this candidate */
for(int k=0; k<Mt; ++k)
{
get_bits(cor[k*MOD_SCHEME+index_level[k]-1],temp_bits);
for(int l=0;l<bitSize;++l)
{
cand_sym[max_cand_index*block_length+ tid*stride + k*bitSize+l] = (float)temp_bits[l];
if(temp_bits[l]==1)
cand_sym[max_cand_index*block_length+ tid*stride + k*bitSize+l] = -1.0f;
if(temp_bits[l]==0)
cand_sym[max_cand_index*block_length+ tid*stride + k*bitSize+l] = 1.0f;
}
}
/* find the next max candidate */
max_cand_dist = -1.0f;
for(int k=0; k<LIST_SIZE; ++k)
{
if(cand_dist[tid*LIST_SIZE + k] > max_cand_dist)
{
max_cand_dist = cand_dist[tid*LIST_SIZE + k];
max_cand_index = k;
}
}
}
if(list_size == LIST_SIZE)
SC_ML = max_cand_dist;
}
else if(nn.ped >= SC_ML && list_size == LIST_SIZE) //tree pruning
{
break;
}
if(j>=1)
push(&s, nn);
}//end for curr_level
nn = pop(&s);
curr_level = nn.level;
ped_parent = nn.ped;
//
while(nn.index >= MOD_SCHEME)
{
index_level[nn.level] = 0;
nn = pop(&s);
}
//
///* going to the next child */
///* set the level for the children below curr level to zero */
for(i=curr_level-2;i>=0; --i)
index_level[i] = 0;
//
if(index_level[curr_level-1] < MOD_SCHEME-1 || (nn.index == -1 && index_level[curr_level-1] == MOD_SCHEME-1))
push(&s,nn);
} //end of while
/* calculate LLRs here */
for(int l=0; l<stride; ++l) //for each bit of the MIMO symbol
{
bit_plus_1_dist = bit_minus_1_dist = INT_MAX*1.0f;
//if(tid == 0)
//printf("INT MAX is %f\n", INT_MAX*1.0f);
for(int k = 0; k<LIST_SIZE; ++k)
{
if(cand_sym[k*block_length+ tid*stride +l] == -1.0f && cand_dist[tid*LIST_SIZE+k] < bit_minus_1_dist)
{
bit_minus_1_dist = cand_dist[tid*LIST_SIZE+k];
}
else if(cand_sym[k*block_length+ tid*stride +l] == 1.0f && cand_dist[tid*LIST_SIZE+k] < bit_plus_1_dist )
{
bit_plus_1_dist = cand_dist[tid*LIST_SIZE+k];
}
}
//if(bit_minus_1_dist != 0.0f)
// printf("got a problem\n");
llr[tid*stride + l] = (1/noise_power)*(bit_minus_1_dist - bit_plus_1_dist);
if(tid == 0)
{
printf("LLRs:%d %f %f %f %f\n",l, bit_plus_1_dist, bit_minus_1_dist, llr[tid*stride+l], cand_sym[1920]);
}
if(llr[tid*stride+l] > CLIP)
llr[tid*stride + l] = CLIP;
else if(llr[tid*stride+l]<-CLIP)
llr[tid*stride+l] = -CLIP;
}
}