float Q_rsqrt( float number )
{
  long i;
  float x2, y;
  const float threehalfs = 1.5F;
 
  x2 = number * 0.5F;
  y  = number;
  i  = * ( long * ) &y;     // evil floating point bit level hacking
  i  = 0x5f3759df - ( i >> 1 );  // what the fuck?
  y  = * ( float * ) &i;
  y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
// y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed
 
  return y;
}
从代码可以看出算法的关键为牛顿法的初值,也就是引起很多人关注和研究的魔法数 0x5f3759df. 下面是该算法的Fortran实现:
module invsqrt_mod
  use iso_fortran_env, only : sp=>real32
  implicit none
contains
  function invsqrt(x) result(y)
    real(sp), intent(in)  :: x
    real(sp) :: y, xhalf
    integer :: i
    xhalf=0.5_sp*x
    i=transfer(x,i)
    i=z'5f3759df'-shiftr(i,1)
    y=transfer(i,y)
    y=y*(1.5_sp-xhalf*y*y)
!    y=y*(1.5_sp-xhalf*y*y)
!    y=y*(1.5_sp-xhalf*y*y)
  end function invsqrt
end module invsqrt_mod
program test
  use invsqrt_mod
  use iso_fortran_env, only : sp=>real32
  implicit none
  real(sp) :: x
  real :: t1, t2
  integer :: i, num
  print*,'num='
  read*,num
  call cpu_time(t1)
  do i=1,num
     x=1._sp/sqrt(real(i,sp))
  end do
  call cpu_time(t2)
  print*,'cpu_time by sqrt=', t2-t1
  call cpu_time(t1)
  do i=1,num
     x=invsqrt(real(i,sp))
  end do
  call cpu_time(t2)
  print*,'cpu_time by invsqrt=', t2-t1
  print*,'x='
  read*,x
  print*,'sqrt(x)=',sqrt(x)
  print*,'by invsqrt=',x*invsqrt(x)
end program test
在我的计算机上上述代码采用 -O3 优化时与采用内置函数 sqrt 的计算结果在计算时间上几乎相同,如果编译时不进行优化 则比内置函数要慢很多。 注意代码实现上也可以用 equivalence 语句代替 transfer 内置函数,但因为前者已被声明为过时特性,这里没有采用。