monc/pencilfft_8F90_source.html

module pencil_fft_mod

  use datadefn_mod, only : default_precision, precision_type

  use grids_mod, only : x_index, y_index, z_index, global_grid_type

  use state_mod, only : model_state_type

  use fftw_mod, only : c_double_complex, c_ptr, fftw_backward, fftw_forward, fftw_estimate, fftw_plan_many_dft_r2c, &

       fftw_plan_many_dft_c2r, fftw_execute_dft_c2r, fftw_execute_dft_r2c, fftw_destroy_plan

  use mpi, only : mpi_double_complex, mpi_int, mpi_comm_self

  implicit none


#ifndef TEST_MODE

  private

#endif


  type pencil_transposition

     integer :: my_pencil_size(3), process_decomposition_layout(3), my_process_location(3), dim

     integer, dimension(:), allocatable :: send_sizes, send_offsets, recv_sizes, recv_offsets

     integer, dimension(:,:), allocatable :: recv_dims, send_dims

  end type pencil_transposition


  integer, parameter :: forward=1, backward=2

  integer :: dim_y_comm, dim_x_comm

  ! Transpositions from one pencil to another

  type(pencil_transposition) :: y_from_z_transposition, x_from_y_transposition, y_from_x_transposition, z_from_y_transposition, &

       y_from_z_2_transposition, x_from_y_2_transposition, y_from_x_2_transposition, z_from_y_2_transposition


  ! Temporary buffers used in transposition

  real(kind=default_precision), dimension(:,:,:), contiguous, pointer :: real_buffer1, real_buffer2, real_buffer3, &

       fft_in_y_buffer , fft_in_x_buffer

  complex(C_DOUBLE_COMPLEX), dimension(:,:,:), contiguous, pointer :: buffer1, buffer2


  ! Pointers to FFTW plans and whether these have been initialised (only initialised once)

  type(c_ptr) :: fftw_plan(4)

  logical :: fftw_plan_initialised(4)=.false.


  public initialise_pencil_fft, finalise_pencil_fft, perform_forward_3dfft, perform_backwards_3dfft

contains


  function initialise_pencil_fft(current_state, my_y_start, my_x_start)

    type(model_state_type), intent(inout) :: current_state

    integer, intent(out) :: my_y_start, my_x_start

    integer :: initialise_pencil_fft(3)


    integer :: ierr, y_distinct_sizes(current_state%parallel%dim_sizes(y_index)), &

         x_distinct_sizes(current_state%parallel%dim_sizes(x_index))


    my_y_start=deduce_my_global_start(current_state, y_index)

    my_x_start=deduce_my_global_start(current_state, x_index)


    if (current_state%parallel%dim_sizes(y_index) .gt. 1 .and. current_state%parallel%dim_sizes(x_index) .gt. 1) then

      call mpi_cart_sub(current_state%parallel%neighbour_comm, (/1,0/), dim_y_comm, ierr)

      call mpi_cart_sub(current_state%parallel%neighbour_comm, (/0,1/), dim_x_comm, ierr)


      call mpi_allgather(current_state%local_grid%size(y_index), 1, mpi_int, y_distinct_sizes, 1, mpi_int, dim_y_comm, ierr)

      call mpi_allgather(current_state%local_grid%size(x_index), 1, mpi_int, x_distinct_sizes, 1, mpi_int, dim_x_comm, ierr)

    else if (current_state%parallel%dim_sizes(y_index) .gt. 1) then

      dim_y_comm=current_state%parallel%monc_communicator

      dim_x_comm=mpi_comm_self

      call mpi_allgather(current_state%local_grid%size(y_index), 1, mpi_int, y_distinct_sizes, 1, mpi_int, dim_y_comm, ierr)

      x_distinct_sizes=current_state%local_grid%size(x_index)

    else if (current_state%parallel%dim_sizes(x_index) .gt. 1) then

      dim_y_comm=mpi_comm_self

      dim_x_comm=current_state%parallel%monc_communicator

      y_distinct_sizes=current_state%local_grid%size(y_index)

      call mpi_allgather(current_state%local_grid%size(x_index), 1, mpi_int, x_distinct_sizes, 1, mpi_int, dim_x_comm, ierr)

    else

      dim_y_comm=mpi_comm_self

      dim_x_comm=mpi_comm_self

      y_distinct_sizes=current_state%local_grid%size(y_index)

      x_distinct_sizes=current_state%local_grid%size(x_index)

    end if


    call initialise_transpositions(current_state, y_distinct_sizes, x_distinct_sizes)

    call initialise_buffers()


    initialise_pencil_fft=z_from_y_transposition%my_pencil_size

  end function initialise_pencil_fft


  subroutine finalise_pencil_fft(monc_communicator)

    integer, intent(in) :: monc_communicator

    integer :: ierr, i


    do i=1,size(fftw_plan_initialised)

      if (fftw_plan_initialised(i)) then

        call fftw_destroy_plan(fftw_plan(i))

      end if

    end do


    if (dim_y_comm .ne. mpi_comm_self .and. dim_y_comm .ne. monc_communicator) call mpi_comm_free(dim_y_comm, ierr)

    if (dim_x_comm .ne. mpi_comm_self .and. dim_x_comm .ne. monc_communicator) call mpi_comm_free(dim_x_comm, ierr)

    deallocate(buffer1, buffer2, real_buffer1, real_buffer2, real_buffer3, fft_in_y_buffer , fft_in_x_buffer)

  end subroutine finalise_pencil_fft


  subroutine perform_forward_3dfft(current_state, source_data, target_data)

    type(model_state_type), target, intent(inout) :: current_state

    real(kind=default_precision), dimension(:,:,:), intent(inout) :: source_data

    real(kind=default_precision), dimension(:,:,:), intent(out) :: target_data


    call transpose_and_forward_fft_in_y(current_state, source_data, buffer1, real_buffer1)

    real_buffer1=real_buffer1/current_state%global_grid%size(y_index)

    call transpose_and_forward_fft_in_x(current_state, real_buffer1, buffer2, real_buffer2)

    real_buffer2=real_buffer2/current_state%global_grid%size(x_index)


    call transpose_to_pencil(y_from_x_transposition, (/x_index, z_index, y_index/), dim_x_comm, backward, &

         real_buffer2, real_buffer3)

    call transpose_to_pencil(z_from_y_transposition, (/y_index, x_index, z_index/), dim_y_comm, backward, &

       real_buffer3, target_data)

  end subroutine perform_forward_3dfft


  subroutine perform_backwards_3dfft(current_state, source_data, target_data)

    type(model_state_type), target, intent(inout) :: current_state

    real(kind=default_precision), dimension(:,:,:), intent(in) :: source_data

    real(kind=default_precision), dimension(:,:,:), intent(out) :: target_data


    call transpose_to_pencil(y_from_z_2_transposition, (/z_index, y_index, x_index/), dim_y_comm, forward, &

       source_data, real_buffer3)

    call transpose_to_pencil(x_from_y_2_transposition, (/y_index, x_index, z_index/), dim_x_comm, forward, &

       real_buffer3, real_buffer2)


    call transpose_and_backward_fft_in_x(current_state, real_buffer2, buffer2, real_buffer1)

    call transpose_and_backward_fft_in_y(current_state, real_buffer1, buffer1, target_data)

  end subroutine perform_backwards_3dfft


  subroutine initialise_buffers()

    allocate(buffer1(y_from_z_transposition%my_pencil_size(y_index)/2+1, y_from_z_transposition%my_pencil_size(x_index), &

         y_from_z_transposition%my_pencil_size(z_index)), &

         real_buffer1((y_from_z_transposition%my_pencil_size(y_index)/2+1)*2, y_from_z_transposition%my_pencil_size(x_index), &

         y_from_z_transposition%my_pencil_size(z_index)), &

         buffer2(x_from_y_transposition%my_pencil_size(x_index)/2+1, x_from_y_transposition%my_pencil_size(z_index), &

         x_from_y_transposition%my_pencil_size(y_index)), &

         real_buffer2((x_from_y_transposition%my_pencil_size(x_index)/2+1)*2, x_from_y_transposition%my_pencil_size(z_index), &

         x_from_y_transposition%my_pencil_size(y_index)), &

         fft_in_y_buffer(y_from_z_transposition%my_pencil_size(y_index), y_from_z_transposition%my_pencil_size(x_index), &

         y_from_z_transposition%my_pencil_size(z_index)), &

         fft_in_x_buffer(x_from_y_transposition%my_pencil_size(x_index), x_from_y_transposition%my_pencil_size(z_index), &

         x_from_y_transposition%my_pencil_size(y_index)), &

         real_buffer3(y_from_x_transposition%my_pencil_size(y_index), y_from_x_transposition%my_pencil_size(x_index), &

         y_from_x_transposition%my_pencil_size(z_index)))

  end subroutine initialise_buffers


  subroutine initialise_transpositions(current_state, y_distinct_sizes, x_distinct_sizes)

    type(model_state_type), intent(inout) :: current_state

    integer, dimension(:), intent(in) :: y_distinct_sizes, x_distinct_sizes


    type(pencil_transposition) :: z_pencil


    z_pencil=create_initial_transposition_description(current_state)


    ! Transpositions

    y_from_z_transposition=create_transposition(current_state%global_grid, z_pencil, y_index, y_distinct_sizes, &

         forward, (/ -1 /))

    x_from_y_transposition=create_transposition(current_state%global_grid, y_from_z_transposition, x_index, &

         x_distinct_sizes, forward, (/ y_index /))

    y_from_x_transposition=create_transposition(current_state%global_grid, x_from_y_transposition, y_index, &

         normal_to_extended_process_dim_sizes(x_distinct_sizes), backward, (/ y_index, x_index /))

    z_from_y_transposition=create_transposition(current_state%global_grid, y_from_x_transposition, z_index, &

         normal_to_extended_process_dim_sizes(y_distinct_sizes), backward, (/ y_index, x_index /))


    y_from_z_2_transposition=create_transposition(current_state%global_grid, z_from_y_transposition, y_index, &

          normal_to_extended_process_dim_sizes(y_distinct_sizes), forward, (/ y_index, x_index /))

    x_from_y_2_transposition=create_transposition(current_state%global_grid, y_from_z_2_transposition, x_index, &

          normal_to_extended_process_dim_sizes(x_distinct_sizes), forward, (/ y_index, x_index /))

    y_from_x_2_transposition=create_transposition(current_state%global_grid, x_from_y_2_transposition, y_index, &

         x_distinct_sizes, backward, (/ y_index /))

    z_from_y_2_transposition=create_transposition(current_state%global_grid, y_from_x_2_transposition, z_index, &

          y_distinct_sizes, backward, (/ -1 /))

  end subroutine initialise_transpositions


  type(pencil_transposition) function create_transposition(global_grid, existing_transposition, new_pencil_dim,&

       process_dim_sizes, direction, extended_dimensions)

    type(global_grid_type), intent(inout) :: global_grid

    type(pencil_transposition), intent(in) :: existing_transposition

    integer, dimension(:), intent(in) :: process_dim_sizes

    integer, intent(in) :: new_pencil_dim, direction, extended_dimensions(:)


    create_transposition%process_decomposition_layout=determine_pencil_process_dimensions(&

         new_pencil_dim, existing_transposition%dim, existing_transposition%process_decomposition_layout)


    create_transposition%my_process_location=determine_my_pencil_location(new_pencil_dim, &

         existing_transposition%dim, existing_transposition%my_process_location)


    create_transposition%my_pencil_size=determine_pencil_size(new_pencil_dim, create_transposition%process_decomposition_layout,&

         create_transposition%my_process_location, existing_transposition, global_grid, extended_dimensions)


    allocate(create_transposition%send_dims(3, create_transposition%process_decomposition_layout(existing_transposition%dim)), &

           create_transposition%recv_dims(3, create_transposition%process_decomposition_layout(existing_transposition%dim)))

    if (direction == forward) then

      call determine_my_process_sizes_per_dim(existing_transposition%dim, &

           existing_transposition%my_pencil_size, create_transposition%process_decomposition_layout, &

           global_grid, extended_dimensions, create_transposition%send_dims)

      call determine_matching_process_dimensions(new_pencil_dim, existing_transposition%dim, process_dim_sizes, &

           create_transposition%my_pencil_size, create_transposition%process_decomposition_layout, create_transposition%recv_dims)

    else

      call determine_my_process_sizes_per_dim(new_pencil_dim, create_transposition%my_pencil_size, &

           existing_transposition%process_decomposition_layout, global_grid, extended_dimensions, create_transposition%recv_dims)

      call determine_matching_process_dimensions(existing_transposition%dim, new_pencil_dim, process_dim_sizes, &

           existing_transposition%my_pencil_size, existing_transposition%process_decomposition_layout, &

           create_transposition%send_dims)

    end if


    allocate(create_transposition%send_sizes(size(create_transposition%send_dims, 2)), &

         create_transposition%send_offsets(size(create_transposition%send_sizes)), &

         create_transposition%recv_sizes(size(create_transposition%recv_dims, 2)), &

         create_transposition%recv_offsets(size(create_transposition%recv_sizes)))


    call concatenate_dimension_sizes(create_transposition%send_dims, create_transposition%send_sizes)

    call determine_offsets_from_size(create_transposition%send_sizes, create_transposition%send_offsets)


    call concatenate_dimension_sizes(create_transposition%recv_dims, create_transposition%recv_sizes)

    call determine_offsets_from_size(create_transposition%recv_sizes, create_transposition%recv_offsets)

    create_transposition%dim=new_pencil_dim

  end function create_transposition


  subroutine transpose_and_forward_fft_in_y(current_state, source_data, buffer, real_buffer)

    type(model_state_type), target, intent(inout) :: current_state

    real(kind=default_precision), dimension(:,:,:), intent(inout) :: source_data

    real(kind=default_precision), dimension(:,:,:),  intent(out) :: real_buffer

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:),  contiguous, pointer, intent(out) :: buffer


    ! Transpose globally from Z pencil to Y pencil

    call transpose_to_pencil(y_from_z_transposition, (/z_index, y_index, x_index/), dim_y_comm, forward, &

       source_data, fft_in_y_buffer)


    call perform_r2c_fft(fft_in_y_buffer, buffer, y_from_z_transposition%my_pencil_size(y_index), &

         y_from_z_transposition%my_pencil_size(x_index) * y_from_z_transposition%my_pencil_size(z_index), 1)

    call convert_complex_to_real(buffer, real_buffer)

  end subroutine transpose_and_forward_fft_in_y


  subroutine transpose_and_backward_fft_in_x(current_state, source_data, buffer, real_buffer)

    type(model_state_type), target, intent(inout) :: current_state

    real(kind=default_precision), dimension(:,:,:), intent(inout) :: source_data

    real(kind=default_precision), dimension(:,:,:),  intent(out) :: real_buffer

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:), contiguous, pointer, intent(out) :: buffer


    call convert_real_to_complex(source_data, buffer)

    call perform_c2r_fft(buffer, fft_in_x_buffer, x_from_y_2_transposition%my_pencil_size(x_index)-2, &

         x_from_y_2_transposition%my_pencil_size(y_index) * x_from_y_2_transposition%my_pencil_size(z_index), 2)


    ! Transpose globally from X pencil to Y pencil

    call transpose_to_pencil(y_from_x_2_transposition, (/x_index, z_index, y_index/), dim_x_comm, backward, &

       fft_in_x_buffer, real_buffer)

  end subroutine transpose_and_backward_fft_in_x


  subroutine transpose_and_forward_fft_in_x(current_state, buffer1, buffer2, buffer3)

    type(model_state_type), target, intent(inout) :: current_state

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:),  contiguous, pointer, intent(out) :: buffer2

    real(kind=default_precision), dimension(:,:,:), intent(inout) :: buffer1, buffer3


    ! Go from global Y pencil to global X pencil

    call transpose_to_pencil(x_from_y_transposition, (/y_index, x_index, z_index/), dim_x_comm, forward, &

       buffer1, fft_in_x_buffer)


    call perform_r2c_fft(fft_in_x_buffer, buffer2, x_from_y_transposition%my_pencil_size(x_index), &

         x_from_y_transposition%my_pencil_size(y_index) * x_from_y_transposition%my_pencil_size(z_index), 3)


    call convert_complex_to_real(buffer2, buffer3)

  end subroutine transpose_and_forward_fft_in_x


  subroutine transpose_and_backward_fft_in_y(current_state, source_data, buffer, real_buffer)

    type(model_state_type), target, intent(inout) :: current_state

    real(kind=default_precision), dimension(:,:,:), intent(inout) :: source_data

    real(kind=default_precision), dimension(:,:,:),  intent(out) :: real_buffer

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:), contiguous, pointer, intent(out) :: buffer


    call convert_real_to_complex(source_data, buffer)


    call perform_c2r_fft(buffer, fft_in_y_buffer,  y_from_x_2_transposition%my_pencil_size(y_index)-2, &

         y_from_x_2_transposition%my_pencil_size(x_index) * y_from_x_2_transposition%my_pencil_size(z_index), 4)


    ! Go from global Y pencil to global Z pencil

    call transpose_to_pencil(z_from_y_2_transposition, (/y_index, x_index, z_index/), dim_y_comm, backward, &

       fft_in_y_buffer, real_buffer)

  end subroutine transpose_and_backward_fft_in_y


  subroutine transpose_to_pencil(transposition_description, source_dims, communicator, direction, source_data, target_data)

    type(pencil_transposition), intent(in) :: transposition_description

    integer, intent(in) :: source_dims(3), communicator, direction

    real(kind=default_precision), dimension(:,:,:), intent(in) :: source_data

    real(kind=default_precision), dimension(:,:,:), intent(out) :: target_data


    integer :: ierr

    real(kind=default_precision), dimension(:,:,:), allocatable :: real_temp

    real(kind=default_precision), dimension(:), allocatable :: real_temp2


    allocate(real_temp(size(source_data,3), size(source_data,2), size(source_data,1)), &

         real_temp2(product(transposition_description%my_pencil_size)+1))


    call rearrange_data_for_sending(real_source=source_data, real_target=real_temp)


    call mpi_alltoallv(real_temp, transposition_description%send_sizes, transposition_description%send_offsets, &

         precision_type, real_temp2, transposition_description%recv_sizes, transposition_description%recv_offsets, &

         precision_type, communicator, ierr)

    call contiguise_data(transposition_description, (/source_dims(3), source_dims(2), source_dims(1)/), direction, &

         source_real_buffer=real_temp2, target_real_buffer=target_data)

    deallocate(real_temp, real_temp2)

  end subroutine transpose_to_pencil


  subroutine contiguise_data(transposition_description, source_dims, direction, source_real_buffer, target_real_buffer)

    integer, intent(in) :: source_dims(3), direction

    type(pencil_transposition), intent(in) :: transposition_description

    real(kind=default_precision), dimension(:), intent(in) :: source_real_buffer

    real(kind=default_precision), dimension(:,:,:), intent(out) :: target_real_buffer


    integer :: number_blocks, i, j, k, n, index_prefix, index_prefix_dim, block_offset, source_index


    number_blocks=size(transposition_description%recv_sizes)

    index_prefix=0

    block_offset=0

    index_prefix_dim=merge(2,1, direction == forward)

    do i=1,number_blocks

      if (i .ge. 2) then

        index_prefix=index_prefix+transposition_description%recv_dims(source_dims(index_prefix_dim), i-1)

        block_offset=block_offset+transposition_description%recv_sizes(i-1)

      end if

      !Transformation is either cba -> bca (forward) or cab (backwards)

      do j=1, transposition_description%recv_dims(source_dims(3), i) ! a

        do k=1, transposition_description%recv_dims(source_dims(1), i) ! c

          do n=1, transposition_description%recv_dims(source_dims(2), i) ! b

            source_index=block_offset+(j-1)* transposition_description%recv_dims(source_dims(1), i)* &

                 transposition_description%recv_dims(source_dims(2), i)+ (n-1)* &

                 transposition_description%recv_dims(source_dims(1), i)+k

            if (direction == forward) then

              target_real_buffer(index_prefix+n, k, j)=source_real_buffer(source_index) ! bca

            else

              target_real_buffer(index_prefix+k, j, n)=source_real_buffer(source_index) ! cab

            end if

          end do

        end do

      end do

    end do

  end subroutine contiguise_data


  subroutine perform_r2c_fft(source_data, transformed_data, row_size, num_rows, plan_id)

    real(kind=default_precision), dimension(:,:,:), contiguous, pointer, intent(inout) :: source_data

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:), contiguous, pointer, intent(inout) :: transformed_data

    integer, intent(in) :: row_size, num_rows, plan_id


    if (.not. fftw_plan_initialised(plan_id)) then

      fftw_plan(plan_id) = fftw_plan_many_dft_r2c(1, (/row_size/), num_rows, source_data, (/row_size/), 1, row_size, &

           transformed_data, (/row_size/), 1, row_size/2+1, fftw_estimate)

      fftw_plan_initialised(plan_id)=.true.

    end if

    call fftw_execute_dft_r2c(fftw_plan(plan_id), source_data, transformed_data)

  end subroutine perform_r2c_fft


  subroutine perform_c2r_fft(source_data, transformed_data, row_size, num_rows, plan_id)

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:), contiguous, pointer, intent(inout) :: source_data

    real(kind=default_precision), dimension(:,:,:), contiguous, pointer, intent(inout) :: transformed_data

    integer, intent(in) :: row_size, num_rows, plan_id


    if (.not. fftw_plan_initialised(plan_id)) then

      ! n is the size of the FFT (in real, not complex->real coords.) There are row_size/2+1 between entries for the input

      ! (complex) data and row_size between entries for the output data

      fftw_plan(plan_id) = fftw_plan_many_dft_c2r(1, (/row_size/), num_rows, source_data, (/row_size/2+1/), 1, row_size/2+1, &

           transformed_data, (/row_size/), 1, row_size, fftw_estimate)

      fftw_plan_initialised(plan_id)=.true.

    end if

    call fftw_execute_dft_c2r(fftw_plan(plan_id), source_data, transformed_data)

  end subroutine perform_c2r_fft


  subroutine rearrange_data_for_sending(real_source, real_target)

    real(kind=default_precision), dimension(:,:,:), intent(in) :: real_source

    real(kind=default_precision), dimension(:,:,:), intent(out) :: real_target


    integer :: i


    do i=1, size(real_source,2)

      real_target(:,i,:)=transpose(real_source(:,i,:))

    end do

  end subroutine rearrange_data_for_sending


  subroutine determine_my_process_sizes_per_dim(existing_pencil_dim, existing_pencil_size, new_pencil_procs_per_dim, &

       global_grid, extended_dimensions, specific_sizes_per_dim)

    integer, intent(in) :: existing_pencil_dim, existing_pencil_size(:), new_pencil_procs_per_dim(:), extended_dimensions(:)

    type(global_grid_type), intent(inout) :: global_grid

    integer, dimension(:,:), intent(inout) :: specific_sizes_per_dim


    integer :: i, split_size, split_remainder, j, s


    do i=1,3

      if (i == existing_pencil_dim) then

        s=global_grid%size(i)

        if (is_extended_dimension(i, extended_dimensions)) s=s+2

        split_size = s / new_pencil_procs_per_dim(i)

        split_remainder = s - split_size * new_pencil_procs_per_dim(i)

        do j=1,new_pencil_procs_per_dim(existing_pencil_dim)

          specific_sizes_per_dim(i,j)=merge(split_size+1, split_size, j .le. split_remainder)

        end do

      else

        specific_sizes_per_dim(i,:) = existing_pencil_size(i)

      end if

    end do

  end subroutine determine_my_process_sizes_per_dim


  subroutine determine_offsets_from_size(source_sizes, determined_offsets)

    integer, intent(in) :: source_sizes(:)

    integer, dimension(:), intent(inout) :: determined_offsets


    integer :: i


    determined_offsets(1)=0

    do i=2,size(source_sizes)

      determined_offsets(i)=determined_offsets(i-1)+source_sizes(i-1)

    end do

  end subroutine determine_offsets_from_size


  function determine_pencil_process_dimensions(new_pencil_dim, existing_pencil_dim, existing_pencil_procs)

    integer, intent(in) :: new_pencil_dim, existing_pencil_dim, existing_pencil_procs(3)

    integer :: determine_pencil_process_dimensions(3)


    integer :: i


    do i=1,3

      if (i == new_pencil_dim) then

        determine_pencil_process_dimensions(i)=1

      else if (i == existing_pencil_dim) then

        determine_pencil_process_dimensions(i)=existing_pencil_procs(new_pencil_dim)

      else

        determine_pencil_process_dimensions(i)=existing_pencil_procs(i)

      end if

    end do

  end function determine_pencil_process_dimensions


  function determine_my_pencil_location(new_pencil_dim, existing_pencil_dim, existing_locations)

    integer, intent(in) :: new_pencil_dim, existing_pencil_dim, existing_locations(3)

    integer :: determine_my_pencil_location(3)


    integer :: i


    do i=1,3

      if (i == new_pencil_dim) then

        determine_my_pencil_location(i)=1

      else if (i == existing_pencil_dim) then

        determine_my_pencil_location(i)=existing_locations(new_pencil_dim)

      else

        determine_my_pencil_location(i)=existing_locations(i)

      end if

    end do

  end function determine_my_pencil_location


  subroutine concatenate_dimension_sizes(dims, concatenated_dim_sizes)

    integer, dimension(:,:), intent(in) :: dims

    integer, dimension(:), intent(inout) :: concatenated_dim_sizes


    integer :: i


    do i=1,size(dims, 2)

      concatenated_dim_sizes(i)=product(dims(:,i))

    end do

  end subroutine concatenate_dimension_sizes


  subroutine determine_matching_process_dimensions(new_pencil_dim, existing_pencil_dim, proc_sizes, &

       my_pencil_size, pencil_processes_per_dim, specific_sizes_per_dim)

    integer, intent(in) :: new_pencil_dim, existing_pencil_dim, proc_sizes(:), my_pencil_size(:), pencil_processes_per_dim(:)

    integer, dimension(:,:), intent(inout) :: specific_sizes_per_dim


    integer :: i, j


    do i=1,pencil_processes_per_dim(existing_pencil_dim)

      do j=1,3

        if (j==new_pencil_dim) then

          specific_sizes_per_dim(j, i)=proc_sizes(i)

        else

          specific_sizes_per_dim(j, i)=my_pencil_size(j)

        end if

      end do

    end do

  end subroutine determine_matching_process_dimensions


  type(pencil_transposition) function create_initial_transposition_description(current_state)

    type(model_state_type), intent(inout) :: current_state


    create_initial_transposition_description%dim=z_index

    create_initial_transposition_description%process_decomposition_layout=current_state%parallel%dim_sizes

    create_initial_transposition_description%my_process_location=current_state%parallel%my_coords

    create_initial_transposition_description%my_pencil_size=current_state%local_grid%size

  end function create_initial_transposition_description


  function determine_pencil_size(new_pencil_dim, pencil_process_layout, my_pencil_location, existing_transposition,&

       global_grid, extended_dimensions)


    type(pencil_transposition), intent(in) :: existing_transposition

    integer, intent(in) :: new_pencil_dim, pencil_process_layout(3), my_pencil_location(3), extended_dimensions(:)

    type(global_grid_type), intent(inout) :: global_grid

    integer :: determine_pencil_size(3)


    integer :: i, split_size, split_remainder, s


    do i=1,3

      if (i == new_pencil_dim) then

        if (is_extended_dimension(i, extended_dimensions)) then

          ! If complex and Y dim then /2+1 for the global size

          determine_pencil_size(i)=(global_grid%size(new_pencil_dim)/2+1)*2

        else

          determine_pencil_size(i)=global_grid%size(new_pencil_dim)

        end if

      else if (i == existing_transposition%dim) then

        s=global_grid%size(i)

        ! If complex and Y dim then use s/2+1 for the size to split

        if (is_extended_dimension(i, extended_dimensions)) s=(s/2+1)*2

        split_size=s/pencil_process_layout(i)

        split_remainder=s - split_size * pencil_process_layout(i)

        determine_pencil_size(i)=merge(split_size+1, split_size, my_pencil_location(i)+1 .le. split_remainder)

      else

        determine_pencil_size(i)=existing_transposition%my_pencil_size(i)

      end if

    end do

  end function determine_pencil_size


  logical function is_extended_dimension(dimension, extended_dimensions)

    integer, intent(in) :: dimension, extended_dimensions(:)


    integer :: i

    do i=1,size(extended_dimensions)

      if (extended_dimensions(i) == dimension) then

        is_extended_dimension=.true.

        return

      end if

    end do

    is_extended_dimension=.false.

  end function is_extended_dimension


  function normal_to_extended_process_dim_sizes(process_dim_sizes)

    integer, dimension(:), intent(in) :: process_dim_sizes

    integer, dimension(size(process_dim_sizes)) :: normal_to_extended_process_dim_sizes


    integer :: temp_total, split_size, remainder


    temp_total=(sum(process_dim_sizes) /2 + 1) * 2

    split_size=temp_total/size(process_dim_sizes)

    remainder=temp_total - split_size*size(process_dim_sizes)


    normal_to_extended_process_dim_sizes=split_size

    normal_to_extended_process_dim_sizes(1:remainder)=split_size+1

  end function normal_to_extended_process_dim_sizes


  subroutine convert_complex_to_real(complex_data, real_data)

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:), intent(in) :: complex_data

    real(kind=default_precision), dimension(:,:,:), intent(out) :: real_data


    integer :: i, j, k


    do i=1,size(real_data,3)

      do j=1,size(real_data,2)

        do k=1,size(real_data,1),2

          real_data(k,j,i)=real(real(complex_data((k+1)/2,j,i)), kind=default_precision)

          real_data(k+1,j,i)=real(aimag(complex_data((k+1)/2,j,i)), kind=default_precision)

        end do

      end do

    end do

  end subroutine convert_complex_to_real


  subroutine convert_real_to_complex(real_data, complex_data)

    real(kind=default_precision), dimension(:,:,:), intent(in) :: real_data

    complex(C_DOUBLE_COMPLEX), dimension(:,:,:), contiguous, pointer, intent(out) :: complex_data


    integer :: i, j, k


    complex_data=cmplx(0.0d0, 0.0d0, kind=c_double_complex)


    do i=1,size(real_data,3)

      do j=1,size(real_data,2)

        do k=1,size(real_data,1),2

          complex_data((k+1)/2,j,i)=cmplx(real_data(k,j,i), real_data(k+1,j,i), kind=c_double_complex)

        end do

      end do

    end do

  end subroutine convert_real_to_complex


  integer function deduce_my_global_start(current_state, dimension)

    type(model_state_type), intent(inout) :: current_state

    integer, intent(in) :: dimension


    integer complex_size, distributed_size, remainder, larger_nums, smaller_nums


    complex_size=(current_state%global_grid%size(dimension)/2+1)*2

    distributed_size=complex_size / current_state%parallel%dim_sizes(dimension)

    remainder=complex_size - distributed_size * current_state%parallel%dim_sizes(dimension)

    larger_nums=min(remainder, current_state%parallel%my_coords(dimension))

    smaller_nums=current_state%parallel%my_coords(dimension)-remainder

    deduce_my_global_start=((distributed_size+1)*larger_nums + merge(distributed_size*smaller_nums, 0, smaller_nums .gt. 0)) + 1

  end function deduce_my_global_start

end module pencil_fft_mod