why they are different

Hi, everyone
I write two kernels, snippet is as follows

	
for (uint y = get_group_id(0); y < nHeight; y += get_num_groups(0)) 
{
     for (uint x = get_local_id(0); x < nWidth; x += get_local_size(0))
     {
           for(int i=0;i<N;++i){......}
    }
}

	
for (uint y = get_group_id(0); y < nHeight; y += get_num_groups(0)) 
{
     for (uint x = get_local_id(0); x < nWidth; x += get_local_size(0))
     {
          function()
     }
}
void function()
{
    for(int i=0;i<N;++i){......}
}

code 1 is wrong, code 2 is right. why? I think gpu must do some optimization to my code which i dont want.

What you describe could be a bug in the compiler, or it could be that the source code is doing something wrong in both cases and by sheer luck the output is correct in one of the cases.

Those loops look very strange. What is it that you are trying to do? Can you show us the full source code?

void calcX(int iLen, __global const uint* src, __global const float* pCo, float* nTotal, int nWidth, int x, float4* result)
{
	for (int i=-iLen; i<=iLen; ++i) 
	{
		if (x+i>=0 && x+i<nWidth)
		{
			unsigned int nValue	=src[x+i];
			*result	+=pCo[iLen+i]*rgbaUintToFloat4(nValue);
			*nTotal	+=pCo[iLen+i];
		}
	}
}

__kernel void GaussHor(__global const uint *src, __global uint *dst, uint nWidth, uint nHeight, const uint iLen,__global const float *pCo)
{
	for (uint y = get_group_id(0); y < nHeight; y += get_num_groups(0)) 
	{
		const __global uint*	pSrc	=src+y*nWidth;
		__global uint*	pDst	=dst+y*nWidth;
		for (uint x = get_local_id(0); x < nWidth; x += get_local_size(0))
		{
			float4	result	=(float4)0.0f;
			float	nTotal	=0;
			calcX(iLen, pSrc, pCo, &nTotal, nWidth, x, &result);
			pDst[x]	=rgbaFloat4ToUint(result/nTotal);
			result	=0.0f;			   
		}
	}
}

this code is doing gauss blur to image, i referenced code provided by nvidia. parameter iLen is gauss blur length, parameter pCo is coefficient used to do blur.

the code above is right, but if i do the for loop in GaussHor() instead of using caclX() ,the result is wrong, so what is wrong?thanks for your replay.