LLVM Openmp Interop Fortran

Table of Contents

Introduction

Moving ahead to fortran, we got llvm-openmp APIs working in C, now is time to write interface and interop modue in fortran and get it initially working with GFortran. In this blog, I’ll discuss a small working example.

Example

openmp pragma

program openmp
use omp_lib

integer :: thread_id

!$omp parallel private(thread_id)

thread_id = omp_get_thread_num()
print *, "hello from process: ", thread_id
!$omp end parallel
end program
% gfortran a.f90 -fopenmp && ./a.out
 hello from process:            1
 hello from process:            0
 hello from process:            2
 hello from process:            3
 hello from process:            5
 hello from process:            6
 hello from process:            4
 hello from process:            7

transformed to

Here, instead of kmp.h, I’ll be defining openmpinterop module that will contain all bind(c) interfaces.

openmpinterop

module openmpinterop
use, intrinsic :: iso_c_binding
interface
    subroutine kmpc_fork_call(ident, nargs, microtask) bind(c, name="__kmpc_fork_call")
    import :: c_ptr, c_funptr, c_int
    type(c_ptr), intent(in) :: ident
    integer(c_int), value :: nargs
    type(c_funptr), value :: microtask
    end subroutine kmpc_fork_call

    function omp_get_max_threads() bind(c, name="omp_get_max_threads")
    import :: c_int
    integer(c_int) :: omp_get_max_threads
    end function omp_get_max_threads

    function omp_get_thread_num() bind(c, name="omp_get_thread_num")
    import :: c_int
    integer(c_int) :: omp_get_thread_num
    end function omp_get_thread_num
end interface

type ident_t
    integer(c_int) flags
    integer(c_int) reserved_1
    integer(c_int) reserved_2
    integer(c_int) reserved_3
    character(len=23) psource
end type ident_t
end module openmpinterop

translated code

subroutine main_omp() bind(c)
use :: iso_c_binding, only: c_int
use openmpinterop
integer(c_int) :: thread_id
thread_id = omp_get_thread_num()
print *, "hello from thread:", thread_id
end subroutine main_omp

program main
use openmpinterop

implicit none

interface
    subroutine main_omp() bind(c)
    end subroutine main_omp
end interface

type(c_ptr) :: loc_ptr
type(ident_t), target :: loc_
integer(c_int) :: flags, reserved_1, reserved_2, reserved_3
character(len=23) :: psource

print *, "omp_get_max_threads(): ", omp_get_max_threads()

flags = 2
reserved_1 = 0
reserved_2 = 0
reserved_3 = 22
psource = ";unknown;unknown;0;0;;"

loc_%flags = flags
loc_%reserved_1 = reserved_1
loc_%reserved_2 = reserved_2
loc_%reserved_3 = reserved_3
loc_%psource = psource

loc_ptr = c_loc(loc_)

print *, "loc_ptr: ", loc_ptr
print *, "c_funloc(main_omp): ", c_funloc(main_omp)

! Call the __kmpc_fork_call function
call kmpc_fork_call(loc_ptr, 0, c_funloc(main_omp))

end program main
% gfortran -c openmpinterop.f90 && gfortran a-transformed.f90 /Users/pranavchiku/repos/llvm-project/openmp/build/runtime/src/libomp.dylib && ./a.out
 omp_get_max_threads():            8
 loc_ptr:            6123746520
 c_funloc(main_omp):            4343151376
 hello from thread:           3
 hello from thread:           7
 hello from thread:           6
 hello from thread:           4
 hello from thread:           2
 hello from thread:           1
 hello from thread:           5
 hello from thread:           0

Now, increasing complexity of example:

Array Assignment

openmp pragma

subroutine initialize_array(n, a, val)
use omp_lib
integer, intent(in) :: n
real, intent(in) :: val
real, dimension(n), intent(out) :: a

integer :: i
!$omp parallel do
do i = 1, n
  a(i) = val
end do
end subroutine

program openmp
implicit none
integer :: n, i
real :: val
real, dimension(1000000) :: a

n = 1000000
val = 3.14
call initialize_array(n, a, val)

do i = 1, 10
  print *, a(i)
end do

end program
% gfortran b.f90 -fopenmp && ./a.out
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    
   3.14000010    

translated code

We will need to add more interface into openmpinterop and thus it looks like:

openmpinterop

module openmpinterop
use, intrinsic :: iso_c_binding
interface
    subroutine kmpc_fork_call_0(ident, nargs, microtask) bind(c, name="__kmpc_fork_call")
    import :: c_ptr, c_funptr, c_int
    type(c_ptr), intent(in) :: ident
    integer(c_int), value :: nargs
    type(c_funptr), value :: microtask
    end subroutine kmpc_fork_call_0

    subroutine kmpc_fork_call_3(ident, nargs, microtask, n, a, val) bind(c, name="__kmpc_fork_call")
        import :: c_ptr, c_funptr, c_int, c_float
        type(c_ptr), intent(in) :: ident
        integer(c_int), value :: nargs
        type(c_funptr), value :: microtask

        type(c_ptr) :: n, val, a
        ! integer(c_int), value :: n
        ! real(c_float), value :: val
        ! real(c_float), dimension(n), intent(out) :: a
    end subroutine kmpc_fork_call_3

    subroutine kmpc_for_static_init_4(loc, gtid, schedtype, plastiter, plower, pupper, &
        pstride, incr, chunk) bind(c, name="__kmpc_for_static_init_4")
    import :: c_ptr, c_int
    type(c_ptr), intent(in) :: loc
    integer(c_int), value :: gtid
    integer(c_int) :: schedtype, incr, chunk
    type(c_ptr), intent(inout) :: plastiter, plower, pupper, pstride
    end subroutine kmpc_for_static_init_4

    subroutine kmpc_for_static_fini(loc, global_tid) bind(c, name="__kmpc_for_static_fini")
    import :: c_ptr, c_int
    type(c_ptr), intent(in) :: loc
    integer(c_int), value :: global_tid
    end subroutine kmpc_for_static_fini

    subroutine kmpc_barrier(loc, global_tid) bind(c, name="__kmpc_barrier")
    import :: c_ptr, c_int
    type(c_ptr), intent(in) :: loc
    integer(c_int), value :: global_tid
    end subroutine kmpc_barrier

    function omp_get_max_threads() bind(c, name="omp_get_max_threads")
    import :: c_int
    integer(c_int) :: omp_get_max_threads
    end function omp_get_max_threads

    function omp_get_thread_num() bind(c, name="omp_get_thread_num")
    import :: c_int
    integer(c_int) :: omp_get_thread_num
    end function omp_get_thread_num

    function kmp_get_global_thread_id() bind(c, name="__kmp_get_global_thread_id")
    import :: c_int
    integer(c_int) :: kmp_get_global_thread_id
    end function kmp_get_global_thread_id

end interface

type ident_t
    integer(c_int) flags
    integer(c_int) reserved_1
    integer(c_int) reserved_2
    integer(c_int) reserved_3
    character(len=23) psource
end type ident_t

contains

type(c_ptr) function create_loc(flags) result(loc_ptr)
implicit none
type(ident_t), target :: loc_
integer(c_int) :: flags, reserved_1, reserved_2, reserved_3
character(len=23) :: psource

reserved_1 = 0
reserved_2 = 0
reserved_3 = 22
psource = ";unknown;unknown;0;0;;"

loc_%flags = flags
loc_%reserved_1 = reserved_1
loc_%reserved_2 = reserved_2
loc_%reserved_3 = reserved_3
loc_%psource = psource

loc_ptr = c_loc(loc_)
end function create_loc

end module openmpinterop

This example is not fully working yet, but it compiles and does something which means we are heading in right direction.

b-transformed.f90

subroutine lcompilers_initialize_array(global_tid, bound_tid, n, a, val) bind(c)
use openmpinterop

type(c_ptr), value :: global_tid, bound_tid
type(c_ptr) :: n, val, a

integer, pointer :: f_global_tid, f_bound_tid, f_tmp

type(c_ptr) :: lastiter, lower, upper, stride
integer(c_int) :: incr, chunk
integer, pointer :: t_lastier, t_lower, t_upper, t_stride

integer :: i

type(c_ptr) :: loc_ptr
loc_ptr = create_loc(514)

incr = 1;
chunk = 1;

allocate(t_lastier)
allocate(t_lower)
allocate(t_upper)
allocate(t_stride)

! t_lastier = 1
! lastiter = c_loc(t_lastier)
! t_lower = 0
! lower = c_loc(t_lower)
! t_upper = n
! upper = c_loc(t_upper)
! t_stride = 1
! stride = c_loc(t_stride)

call c_f_pointer(global_tid, f_global_tid)
call c_f_pointer(bound_tid, f_bound_tid)

! call kmpc_for_static_init_4(loc_ptr, f_global_tid, 34, lastiter, lower, upper, stride, incr, chunk)

call c_f_pointer(lower, f_tmp)
print *, "n: ", n

! call kmpc_for_static_fini(loc_ptr, f_global_tid)
! call kmpc_barrier(loc_ptr, f_global_tid)

end subroutine

subroutine initialize_array(n, a, val)
use openmpinterop
interface
subroutine lcompilers_initialize_array(global_tid, bound_tid, n, a, val) bind(c)
import :: c_ptr, c_int, c_float
type(c_ptr), value :: global_tid, bound_tid
type(c_ptr) :: n, val, a
end subroutine lcompilers_initialize_array
end interface

integer, intent(in), target :: n
real, intent(in), target :: val
real, dimension(n), target :: a

type(c_ptr) :: n_c_ptr, val_c_ptr, a_c_ptr

type(c_ptr) :: loc_ptr
loc_ptr = create_loc(2)

n_c_ptr = c_loc(n)
val_c_ptr = c_loc(val)
a_c_ptr = c_loc(a(1))

call kmpc_fork_call_3( loc_ptr, 3, c_funloc(lcompilers_initialize_array), n_c_ptr, a_c_ptr, val_c_ptr)
end subroutine


program openmp
interface
subroutine initialize_array(n, a, val)
use iso_c_binding
integer, intent(in), target :: n
real, intent(in), target :: val
real, dimension(n), target :: a
end subroutine initialize_array
end interface

integer :: n = 1000000
real :: val = 3.14
real, dimension(1000000) :: a

integer :: i

call initialize_array(n, a, val)

do i = 1, 10
    print *, a(i)
end do

end program
% gfortran -c openmpinterop.f90 && gfortran b-transformed.f90 /Users/pranavchiku/repos/llvm-project/openmp/build/runtime/src/libomp.dylib && ./a.out
 n:            6094206224
 n:            6094206224
 n:            6094206224
 n:            6094206224
 n:            6094206224
 n:            6094206224
 n:            6094206224
 n:            6094206224
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    
   0.00000000    

The output is not yet aligning, I will continue to work on this and get it done soon. Thanks for reading this :)