dear all:
i have optimized a reciprocal function from DG_MATH_Recip to
DG_MATH_Recip_opt, but it still too slow, any one know how to make reciprocal more fast??(on ARM11)
DG_F
DG_MATH_Recip(DG_F a)
{
DGint32 exp;
DGfixed32 x;
DGfixed32 sign;
/* 1/(4x) */
static const DGuint16 rcp_tab[] = { /* domain 0.5 .. 1.0-1/16 */
0x8000, 0x71c7, 0x6666, 0x5d17, 0x5555, 0x4ec4, 0x4924, 0x4444
};
// check zero
if (a == DGX_ZERO) return DGX_PINF;
// check negative
if(a < DGX_ZERO){
a = -a;
sign = -1;
}else{
sign = 1;
}
x = a;
exp = 31;
if (x & 0xffff0000) { exp -= 16; x >>= 16; }
if (x & 0xff00) { exp -= 8; x >>= 8; }
if (x & 0xf0) { exp -= 4; x >>= 4; }
if (x & 0xc) { exp -= 2; x >>= 2; }
if (x & 0x2) { exp -= 1; }
x = rcp_tab[(a>>(28-exp))&0x7]<<2;
exp -= 16;
if (exp <= 0)
x >>= -exp;
else
x <<= exp;
/* two iterations of newton-raphson x = x(2-ax) */
x = fMul(x,(DGX_TWO - fMul(a,x))); //(x<<1)
x = fMul(x,(DGX_TWO - fMul(a,x))); //(x<<1)
return (x * sign);
}
__inline
DG_F DG_MATH_Recip_opt(DG_F a)
{
DGint32 exp;
DGfixed32 x;
DGfixed32 sign;
// check zero
if (a == DGX_ZERO)
return DGX_PINF;
// check negative
sign = 1;
if(a < DGX_ZERO){
a = -a;
sign = -1;
}
__asm
{
CLZ exp, a;
}
x = DG_context->g_pTRcp[(a>>(26-exp))&0x1f];
exp -= 16;
if (exp <= 0)
x >>= -exp;
else
x <<= exp;
// two iterations of newton-raphson x = x(2-ax)
x = fMul(x,(DGX_TWO - fMul(a,x))); //(x<<1)
//x = fMul(x,(DGX_TWO - fMul(a,x))); //(x<<1)
return (x * sign);
}