module caf_intrinsics implicit none ! ! Co-Array Fortran intrinsics missing from cf90 3.1 on Cray T3E. ! ! sync_file, sync_memory, ! sync_all, sync_team, ! start_critical, end_critical. ! ! Also includes a reference implementation of sync_images. ! This may be slower than the intrinsic version, but has ! debugging and deadlock detection capabilities. ! ! Call caf_init once on each image at the start of the program. ! Add use caf_intrinsics anywhere these "intrinsics" are called. ! ! Environment variables: ! caf_sync_debug is set to turn on debugging (default off). ! caf_deadlock_seconds controls deadlock detection (default none). ! ! Call caf_reset to change debugging or deadlock detection status. ! ! Alan J. Wallcraft, NRL, October 1998. ! With modifications by John Reid, November 1998. ! integer, parameter,private :: int64=selected_int_kind(18) integer(int64), parameter,private :: spin_per_sec=5*2**19 ! T3E-900 logical, parameter,private :: spin_local=.true. integer(int64), private, save :: max_spin,max_spin_old logical, private, save :: debug,debug_old integer, private, save :: me,nimg,critical_count,iostat integer, private, save :: critical_image[*] integer, allocatable, save, private :: all(:) integer(int64), allocatable, save, private :: count_team(:)[:] integer(int64), allocatable, save, private :: count_image(:)[:] integer(int64), allocatable, save, private :: count_images(:)[:] interface sync_images module procedure sync_images_0 module procedure sync_images_s module procedure sync_images_a end interface interface sync_all module procedure sync_all_0 module procedure sync_all_s module procedure sync_all_a end interface interface sync_team module procedure sync_team_s module procedure sync_team_ss module procedure sync_team_sa module procedure sync_team_a module procedure sync_team_as module procedure sync_team_aa end interface contains subroutine caf_init() ! ! initialize co-array data structures. ! call once on each image at start of program. ! integer :: lenname,lenval,ierror,i integer(int64) :: deadlock character (256) :: cenv me = this_image() nimg = num_images() critical_count = 0 critical_image = 0 allocate( all(nimg) ) all = (/ (i,i=1,nimg) /) ! ! initialize sync co-array to zero. ! allocate( count_team(nimg)[*] ) count_team(:) = 0 allocate( count_image(nimg)[*] ) count_image(:) = 0 allocate( count_images(nimg)[*] ) count_images(:) = 0 call sync_all() ! ! env caf_sync_debug controls debugging. ! default is .false., i.e. no debugging. ! call caf_reset after caf_init to change debugging status. ! cenv = ' ' lenname = 0 call pxfgetenv('CAF_SYNC_DEBUG',lenname,cenv,lenval,ierror) debug = cenv.ne.' ' debug_old = debug ! ! env caf_deadlock_seconds controls deadlock detection, ! sync_team will spin for this long before aborting. ! also applied to sync_all when debugging. ! default is 0, i.e. spin forever. ! call caf_reset after caf_init to change deadlock detection. ! cenv = ' ' lenname = 0 call pxfgetenv('CAF_DEADLOCK_SECONDS',lenname,cenv,lenval,ierror) if (cenv.eq.' ') then deadlock = 0 else read(cenv,'(I9)') deadlock endif if (deadlock.le.0) then max_spin = huge(max_spin) - 1 ! spin forever elseif (deadlock.ge.huge(max_spin)/spin_per_sec) then max_spin = huge(max_spin) - 1 ! spin forever else max_spin = deadlock*spin_per_sec endif max_spin_old = max_spin end subroutine caf_init subroutine caf_reset(sync_debug,deadlock_seconds) logical, optional :: sync_debug integer, optional :: deadlock_seconds ! ! reset debug and/or deadlock detection status. ! go back to old values if called with no arguments. ! call anytime after caf_init. ! if(present(sync_debug))then debug_old = debug debug = sync_debug else if(.not.present(deadlock_seconds))then ! called with no arguments: ! put debuging and deadlock detection back to where ! they were before the latest set of caf_reset calls. debug = debug_old max_spin = max_spin_old end if if(present(deadlock_seconds))then ! sync_team will spin for deadlock_seconds before aborting. ! also applied to sync_all when debugging. max_spin_old = max_spin if (deadlock_seconds.le.0) then max_spin = huge(max_spin) - 1 ! spin forever elseif (deadlock_seconds.ge.huge(max_spin)/spin_per_sec) then max_spin = huge(max_spin) - 1 ! spin forever else max_spin = deadlock_seconds*spin_per_sec endif end if end subroutine caf_reset subroutine sync_file(unit) integer, intent(in) :: unit ! ! flush an i/o unit. ! cf90 does not implement team=, so this could be a no-op, ! but call flush anyway. note that the flush may fail, ! depending on how the file was opened. ! call flush(unit,iostat) end subroutine sync_file subroutine sync_memory() ! ! flush all co-arrays in the local scope. ! assumes shmem_quiet also works for co-array puts. ! call shmem_quiet() end subroutine sync_memory subroutine sync_images_0() ! ! sync with all images. ! if (debug) then call sync_team_aa(all,all) else call shmem_barrier_all() ! Use hardware barrier endif end subroutine sync_images_0 subroutine sync_images_s(image) integer, intent (in) :: image ! ! sync with single other image image. ! only calls with scalar arguments sync with each other. ! integer(int64) :: mcount,spin if (image==me) then ! should be a no-op, but call sync_memory() anyway. call sync_memory() return endif if (debug) then if (1>image .or. image>nimg) then write(6,*) 'error in sync_images - out of range image' write(6,*) 'this_image(),image = ',me,image call flush(6,iostat) stop endif endif if (spin_local) then ! ! update count remotely then spin locally. ! fixed number of put operations. ! mcount = count_image(me)[image] + 1 count_image(me)[image] = mcount call sync_memory() do spin = 1,max_spin call sync_memory() if (mcount.le.count_image(image)) exit enddo if (spin.gt.max_spin) then write(6,*) 'error in sync_images - deadlock detected' write(6,*) 'this_image(),image = ',me,image write(6,*) 'this_image(),count = ', & me,mcount,count_image(image) write(6,*) 'this_image(),spin = ', & me,max_spin,spin call flush(6,iostat) stop endif else ! ! update count locally then spin remotely. ! fewer remote operations (gets) in best case, but ! spinning creates large number of remote operations. ! mcount = count_image(image) + 1 count_image(image) = mcount call sync_memory() do spin = 1,max_spin call sync_memory() if (mcount.le.count_image(me)[image]) exit enddo if (spin.gt.max_spin) then write(6,*) 'error in sync_images - deadlock detected' write(6,*) 'this_image(),image = ',me,image write(6,*) 'this_image(),count = ', & me,mcount,count_image(image) write(6,*) 'this_image(),spin = ', & me,max_spin,spin call flush(6,iostat) stop endif endif end subroutine sync_images_s subroutine sync_images_a(image) integer, intent (in) :: image(:) ! ! sync with all images in image(:). ! only calls with array arguments sync with each other. ! must be no repeated elements in image(:). ! this_image() must be in image(:). ! ! Note: the CF90 version may allow repeated elements in image(:). ! integer :: k integer(int64) :: mcount,spin if (debug) then if (.not.any(image==me)) then write(6,*) 'error in sync_images - this_image() not in image' write(6,*) 'this_image(),image = ',me,image call flush(6,iostat) stop endif do k= 1,size(image) if (1>image(k) .or. image(k)>nimg) then write(6,*) 'error in sync_images - out of range image element' write(6,*) 'this_image(),k,image(k) = ',me,k,image(k) call flush(6,iostat) stop endif if (count(image==image(k))/=1) then write(6,*) 'error in sync_images - repeated element in image' write(6,*) 'this_image(),image = ',me,image call flush(6,iostat) stop endif enddo endif if (spin_local) then ! ! update count remotely then spin locally. ! fixed number of put operations. ! do k = 1,size(image) mcount = count_images(me)[image(k)] + 1 count_images(me)[image(k)] = mcount enddo call sync_memory() do k = 1,size(image) mcount = count_images(me)[image(k)] do spin = 1,max_spin call sync_memory() if (mcount.le.count_images(image(k))) exit enddo if (spin.gt.max_spin) then write(6,*) 'error in sync_images - deadlock detected' write(6,*) 'this_image(),image(k),image = ',me,image(k),image write(6,*) 'this_image(),count = ', & me,mcount,count_images(image(k)) write(6,*) 'this_image(),spin = ', & me,max_spin,spin call flush(6,iostat) stop endif enddo else ! ! update count locally then spin remotely. ! fewer remote operations (gets) in best case, but ! spinning creates large number of remote operations. ! do k = 1,size(image) mcount = count_images(image(k)) + 1 count_images(image(k)) = mcount enddo call sync_memory() do k = 1,size(image) mcount = count_images(image(k)) do spin = 1,max_spin call sync_memory() if (mcount.le.count_images(me)[image(k)]) exit enddo if (spin.gt.max_spin) then write(6,*) 'error in sync_images - deadlock detected' write(6,*) 'this_image(),image(k),image = ',me,image(k),image write(6,*) 'this_image(),count = ', & me,mcount,count_images(image(k)) write(6,*) 'this_image(),spin = ', & me,max_spin,spin call flush(6,iostat) stop endif enddo endif end subroutine sync_images_a subroutine sync_all_0() ! ! sync with all images. ! if (debug) then call sync_team_aa(all,all) else call shmem_barrier_all() ! Use hardware barrier endif end subroutine sync_all_0 subroutine sync_all_s(wait) integer, intent (in) :: wait ! ! sync with all images. ! early return possible when image wait arrives. ! call sync_all_a( (/wait/) ) end subroutine sync_all_s subroutine sync_all_a(wait) integer, intent (in) :: wait(:) ! ! sync with all images. ! early return possible when all images in wait arrive. ! integer :: k if (debug) then do k= 1,size(wait) if (1>wait(k) .or. wait(k)>nimg) then write(6,*) 'error in sync_all - out of range wait element' write(6,*) 'this_image(),k,wait(k) = ',me,k,wait(k) call flush(6,iostat) stop endif if (count(wait==wait(k))/=1) then write(6,*) 'error in sync_all - repeated element in wait' write(6,*) 'this_image(),wait = ',me,wait call flush(6,iostat) stop endif enddo endif if (debug) then call sync_team_aa(all,wait) else call shmem_barrier_all() ! Use hardware barrier (ignore wait) endif end subroutine sync_all_a subroutine sync_team_s(team) integer, intent (in) :: team ! ! sync with single other image team. ! if (debug) then if (team==me) then write(6,*) 'error in sync_team - scalar team == this_image()' write(6,*) 'this_image(),team = ',me,team call flush(6,iostat) stop endif endif call sync_team_aa( (/me,team/), (/me,team/) ) end subroutine sync_team_s subroutine sync_team_ss(team,wait) integer, intent (in) :: team,wait ! ! sync with single other image team. ! if (debug) then if (team==me) then write(6,*) 'error in sync_team - scalar team == this_image()' write(6,*) 'this_image(),team = ',me,team call flush(6,iostat) stop endif endif call sync_team_aa( (/me,team/), (/wait/) ) end subroutine sync_team_ss subroutine sync_team_sa(team,wait) integer, intent (in) :: team,wait(:) ! ! sync with single other image team. ! if (debug) then if (team==me) then write(6,*) 'error in sync_team - scalar team == this_image()' write(6,*) 'this_image(),team = ',me,team call flush(6,iostat) stop endif endif call sync_team_aa( (/me,team/), wait ) end subroutine sync_team_sa subroutine sync_team_a(team) integer, intent (in) :: team(:) ! ! sync with all images in team(:). ! must be no repeated elements in team(:). ! this_image() must be in team(:). ! call sync_team_aa( team, team ) end subroutine sync_team_a subroutine sync_team_as(team,wait) integer, intent (in) :: team(:),wait ! ! sync with all images in team(:). ! must be no repeated elements in team(:). ! this_image() must be in team(:). ! early return possible when image wait arrives. ! call sync_team_aa( team, (/wait/) ) end subroutine sync_team_as subroutine sync_team_aa(team,wait) integer, intent (in) :: team(:),wait(:) ! ! sync with all images in team(:). ! must be no repeated elements in team(:). ! this_image() must be in team(:). ! early return possible when all images in wait arrive. ! integer :: k integer(int64) :: mcount,spin if (debug) then if (.not.any(team==me)) then write(6,*) 'error in sync_team - this_image() not in team' write(6,*) 'this_image(),team = ',me,team call flush(6,iostat) stop endif do k= 1,size(team) if (1>team(k) .or. team(k)>nimg) then write(6,*) 'error in sync_team - out of range team element' write(6,*) 'this_image(),k,team(k) = ',me,k,team(k) call flush(6,iostat) stop endif if (count(team==team(k))/=1) then write(6,*) 'error in sync_team - repeated element in team' write(6,*) 'this_image(),team = ',me,team call flush(6,iostat) stop endif enddo do k= 1,size(wait) if (.not.any(team==wait(k))) then write(6,*) 'error in sync_team - wait not a subset of team' write(6,*) 'this_image(),k,wait(k),team = ', & me,k,wait(k),team call flush(6,iostat) stop endif if (count(wait==wait(k))/=1) then write(6,*) 'error in sync_team - repeated element in wait' write(6,*) 'this_image(),wait = ',me,wait call flush(6,iostat) stop endif enddo endif if (spin_local) then ! ! update count remotely then spin locally. ! fixed number of put operations. ! do k = 1,size(team) mcount = count_team(me)[team(k)] + 1 count_team(me)[team(k)] = mcount enddo call sync_memory() do k = 1,size(wait) mcount = count_team(me)[wait(k)] do spin = 1,max_spin call sync_memory() if (mcount.le.count_team(wait(k))) exit enddo if (spin.gt.max_spin) then write(6,*) 'error in sync_team - deadlock detected' write(6,*) 'this_image(),wait(k),team = ',me,wait(k),team write(6,*) 'this_image(),count = ', & me,mcount,count_team(wait(k)) write(6,*) 'this_image(),spin = ', & me,max_spin,spin call flush(6,iostat) stop endif enddo else ! ! update count locally then spin remotely. ! fewer remote operations (gets) in best case, but ! spinning creates large number of remote operations. ! do k = 1,size(team) mcount = count_team(team(k)) + 1 count_team(team(k)) = mcount enddo call sync_memory() do k = 1,size(wait) mcount = count_team(wait(k)) do spin = 1,max_spin call sync_memory() if (mcount.le.count_team(me)[wait(k)]) exit enddo if (spin.gt.max_spin) then write(6,*) 'error in sync_team - deadlock detected' write(6,*) 'this_image(),wait(k),team = ',me,wait(k),team write(6,*) 'this_image(),count = ', & me,mcount,count_team(wait(k)) write(6,*) 'this_image(),spin = ', & me,max_spin,spin call flush(6,iostat) stop endif enddo endif end subroutine sync_team_aa subroutine start_critical ! ! start critical region. ! at most one image between start_critical and end_critical. ! if (debug) then if (critical_count < 0) then write(6,*) 'error in start_critical - inappropriate nesting' write(6,*) 'this_image(),critical_image,critical_count = ', & me,critical_image,critical_count call flush(6,iostat) stop elseif (critical_count > 0 .and. critical_image /= me) then write(6,*) 'error in start_critical - two images active' write(6,*) 'this_image(),critical_image,critical_count = ', & me,critical_image,critical_count call flush(6,iostat) stop endif endif if (critical_count==0) then call shmem_set_lock(critical_image) endif critical_count = critical_count + 1 end subroutine start_critical subroutine end_critical ! ! end critical region. ! at most one image between start_critical and end_critical. ! if (debug) then if (critical_count < 1) then write(6,*) 'error in end_critical - inappropriate nesting' write(6,*) 'this_image(),critical_image,critical_count = ', & me,critical_image,critical_count call flush(6,iostat) stop endif endif critical_count = critical_count - 1 if (critical_count==0) then call shmem_clear_lock(critical_image) endif end subroutine end_critical end module caf_intrinsics