int N = 256; // number of bits in a row/col (or any number that is evenly divisible by 64)
unsigned long long A = new unsigned long long[n*n/64]; // stores bits in 64-bit integers
unsigned long long B = new unsigned long long[n*n/64]; // for example, one row consists of 256 bits and uses 4x64-bit integers to store them
int C = new int[n*n];
const size_t global[2] = { n, n };
clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, 0, 0, NULL, &event);
__kernel void BitProduct(const int N, const __global ulong* A, const __global ulong* B, __global int* C)
{
const int i = get_global_id(0);
const int j = get_global_id(1);
ulong sum = 0;
for (int k = 0;k < N/64;k++)
sum += popcount( A[ i*(N/64) + k ] ^ B[ j*(N/64) + k ] );
C[ i * N + j ] = (int) sum;
}
struct Block {
char item[4][4];
};
Block *keys = new Block[11];
keys[3].item[2][2];