使用OpenMP从Fortran子例程中导致错误的结果和崩溃

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了使用OpenMP从Fortran子例程中导致错误的结果和崩溃相关的知识,希望对你有一定的参考价值。

我编写了以下代码,然后尝试使用OpenMP来并行化它。但是,在使用f2py编译以下OpenMP代码之后,Python在运行时总会生成某些错误。没有错误消息,只有数字有点关闭,每当我用f2py编译它并在Python中运行时,它就会杀死内核。

我想知道这是否与我的并行区域有关。我总是对哪些变量设置为私有有点困惑所以任何人都可以观察到任何错误?

subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial 
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension(m*m,nt) :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count

!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
  do j1=1,m
    k1 = k1 + 1
    x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
    y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
  end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)

! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0

do i2 = 1,nt
  !$OMP parallel do private(j2,k2,l)
  do j2 = 1,m*m
    usum(j2,i2) = 0
    vsum(j2,i2) = 0
    count = 0
    !$OMP parallel do reduction(+:usum,vsum,count)
    do k2 = 1,m*m
      if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2) 
then
        usum(j2,i2) = usum(j2,i2)+u(k2,i2)
        vsum(j2,i2) = vsum(j2,i2)+v(k2,i2)
        count = count+1
      end if
    end do
    !$OMP end parallel do
    umean(j2,i2) = usum(j2,i2)/dble(count)
    vmean(j2,i2) = vsum(j2,i2)/dble(count)
    u(j2,i2+1) = s0*cos(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
    v(j2,i2+1) = s0*sin(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
    xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
    ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)

    ! boundary conditions
    if (xtemp(j2,i2+1)>l) then
      xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
    else
      if (xtemp(j2,i2+1)<-l) then
        xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
      end if
    end if
    if (ytemp(j2,i2+1)>l) then
      ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
    else
      if (ytemp(j2,i2+1)<-l) then
        ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
      end if
    end if
  end do
  !$OMP end parallel do
  psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)

end subroutine simulate_omp
答案

参数l使用intent(in)声明,并且不在循环中修改,因此不需要将其声明为private。以下是没有外部并行循环的建议:

subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial 
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count

!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
  do j1=1,m
    k1 = k1 + 1
    x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
    y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
  end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)

! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0

do i2 = 1,nt
  do j2 = 1,m*m
    usum = 0
    vsum = 0
    count = 0
    !$OMP parallel do private(k2), reduction(+:usum,vsum,count)
    do k2 = 1,m*m
      if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2) then
        usum = usum+u(k2,i2)
        vsum = vsum+v(k2,i2)
        count = count+1
      end if
    end do
    !$OMP end parallel do
    umean = usum/dble(count)
    vmean = vsum/dble(count)
    u(j2,i2+1) = s0*cos(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
    v(j2,i2+1) = s0*sin(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
    xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
    ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)

    ! boundary conditions
    if (xtemp(j2,i2+1)>l) then
      xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
    else
      if (xtemp(j2,i2+1)<-l) then
        xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
      end if
    end if
    if (ytemp(j2,i2+1)>l) then
      ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
    else
      if (ytemp(j2,i2+1)<-l) then
        ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
      end if
    end if
  end do
  psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)

end subroutine simulate_omp

您可以计时并将其与使用私有(j2,k2,umean,vmean,usum,vsum,count),shared(u,v,xtemp,ytemp)并行化的外循环进行比较。确保后面的测试将OMP_NESTED设置为true。

以上是关于使用OpenMP从Fortran子例程中导致错误的结果和崩溃的主要内容,如果未能解决你的问题,请参考以下文章

C++ 从 dll 调用 FORTRAN 子例程

fortran 错误地调用子例程

从 C++ 调用带有可选参数的 Fortran 子例程

从 C++ 调用带有可选参数的 Fortran 子例程

从 c++ 调用 FORTRAN 子例程会产生非法参数值

如何将可分配数组传递给 Fortran 中的子例程