module SparseClass use iso_c_binding use ArrayClass use MathClass use RandomClass use RangeClass implicit none !> Please edit "src/SparseClass/SparseClass.c" !> for !> 'c_dot_product' !> 'crs_spmv_real32' !> 'crs_spmv_real64' interface Subroutine c_dot_product(a, b, n, ret) bind(C, Name='c_dot_product') import integer(C_size_t), value :: n real(c_double), intent(in) :: a(n), b(n) real(c_double), intent(out) :: ret(1) End Subroutine End Interface interface Subroutine crs_spmv_real32(val, row_ptr, col_idx, old_vector, new_vector, n, col_size) & Bind(C, Name="crs_spmv_real32") import Integer(C_size_t), Value :: n, col_size real(C_float), Intent(In) :: val(n) integer(C_int64_t), Intent(In) :: row_ptr(n + 1) integer(C_int), Intent(In) :: col_idx(col_size) real(C_float), Intent(In) :: old_vector(n) real(C_float), Intent(InOut) :: new_vector(n) End Subroutine End interface interface Subroutine crs_spmv_real64(val, row_ptr, col_idx, old_vector, new_vector, n, col_size) & Bind(C, Name="crs_spmv_real64") use iso_c_binding import Integer(C_size_t), Value :: n, col_size real(C_double), Intent(In) :: val(n) integer(C_int64_t), Intent(In) :: row_ptr(n + 1) integer(C_int), Intent(In) :: col_idx(col_size) real(C_double), Intent(In) :: old_vector(n) real(C_double), Intent(InOut) :: new_vector(n) End Subroutine End interface ! interface ! Subroutine crs_spmv_real64_opencl(val,row_ptr,col_idx,old_vector,new_vector,n,col_size) & ! Bind(C,Name="crs_spmv_real64_opencl") ! use iso_c_binding ! import ! Integer(C_size_t),Value :: n,col_size ! real(C_double),Intent(In) :: val(n) ! integer(C_int64_t),Intent(In) :: row_ptr(n+1) ! integer(C_int),Intent(In) :: col_idx(col_size) ! real(C_double),Intent(In) :: old_vector(n) ! real(C_double),Intent(InOut) :: new_vector(n) ! ! End Subroutine !End interface interface Subroutine c_sparse_matvec(row_ptr, col_idx, val, x, n, n_col, ret) bind(C, Name='c_sparse_matvec') import integer(C_size_t), value :: n, n_col integer(c_int), intent(in) :: row_ptr(n), col_idx(n_col) real(c_double), intent(in) :: val(n - 1), x(n - 1) real(c_double), intent(out) :: ret(n - 1) End Subroutine End Interface interface sinc module procedure sinc_complex64, sinc_real64, sinc_real64_vec end interface type :: COO_Row_ real(real64), allocatable :: val(:) complex(real64), allocatable :: val_complex64(:) integer(int32), allocatable :: col(:) end type type :: COO_ type(COO_Row_), allocatable :: row(:) contains procedure, public :: init => initCOO procedure, public :: update => updateCOO procedure, public :: set => updateCOO procedure, pass :: addCOO procedure, pass :: add_complexCOO generic :: add => addCOO, add_complexCOO procedure, public :: getDenseMatrix => getDenseMatrixCOO procedure, public :: to_dense => getDenseMatrixCOO procedure, public :: remove => removeCOO procedure, public :: getAllCol => getAllColCOO procedure, public :: DOF => DOFCOO procedure, public :: to_CRS => to_CRSCOO procedure, public :: get => getCOO procedure, public :: ne => neCOO procedure, public :: maxval => maxvalCOO procedure, public :: random => randomCOO ! create random sparse matrix ! typical matrix procedure, public :: eyes => eyesCOO procedure, public :: poisson => poissonCOO !procedure,public ::getAllCol_as_row_obj => getAllCol_as_row_objCOO end type type :: CCS_ integer(int32), allocatable :: col_ptr(:) integer(int32), allocatable :: row_idx(:) real(real64), allocatable :: val(:) contains procedure, public :: get_column => get_column_CCS end type type :: CRS_ integer(int32), allocatable :: col_idx(:) ! Destructive integer(int64), allocatable :: row_ptr(:) real(real64), allocatable :: val(:) real(real32), allocatable :: val_real32(:) complex(real64), allocatable :: val_complex64(:) integer(int32) :: dtype = real64 contains procedure, public :: init => initCRS procedure, public :: eyes => eyesCRS procedure, public :: poisson => poissonCRS procedure, public :: Lanczos => LanczosCRS procedure, pass :: matmulCRS !procedure,pass :: matmul_opencl_CRS procedure, pass :: matmul_c_CRS procedure, pass :: matmul_c_real32_CRS procedure, pass :: matmul_real32_CRS procedure, pass :: matmul_complex_CRS generic :: matmul => matmulCRS, matmul_complex_CRS, matmul_real32_CRS, matmul_c_CRS, & matmul_c_real32_CRS!,matmul_opencl_CRS procedure, public :: to_real32 => to_real32_CRS procedure, public :: SpMV => spmv_as_subroutine_CRS procedure, public :: eig => eigCRS procedure, public :: to_dense => to_denseCRS procedure, public :: DOF => DOFCRS procedure, public :: remove => removeCRS procedure, public :: size => sizeCRS procedure, public :: shape => shapeCRS procedure, public :: update => updateCRS procedure, public :: add => addCRS ! 対象行列とみなし,埋まっていない要素をすべて埋める. procedure, public :: fillSymmetric => fillSymmetric_CRS procedure, public :: get => getCRS procedure, public :: is_nonzero => is_nonzeroCRS procedure, public :: diag => diagCRS procedure, public :: lumped => lumpedCRS procedure, public :: divide_by => divide_by_CRS ! subroutine version procedure, public :: divide_by_vector => divide_by_vector_CRS procedure, public :: mult_by => mult_by_CRS procedure, public :: to_CCS => to_CCSCRS procedure, public :: load => loadCRS procedure, public :: ILU => ILUCRS procedure, public :: ILU_matvec => ILU_matvecCRS procedure, public :: BICGSTAB => BICGSTAB_CRSSparse procedure, pass :: tensor_exponential_crs procedure, pass :: tensor_sqrt_crs procedure, pass :: tensor_exp_sqrt_crs procedure, pass :: tensor_log_crs procedure, public :: tensor_log_crs_modified => tensor_log_crs_modified_Sparse procedure, pass :: fixCRS procedure, pass :: fix_zeroCRS procedure, pass :: tensor_exponential_complex64_crs procedure, pass :: tensor_exp_sqrt_complex64_crs procedure, pass :: tensor_cos_sqrt_complex64_crs procedure, pass :: tensor_cos_sqrt_real64_crs procedure, pass :: tensor_cos_sqrt_LPF_real64_crs procedure, pass :: tensor_sinc_sqrt_complex64_crs procedure, pass :: tensor_sinc_sqrt_real64_crs procedure, pass :: tensor_t_sinc_sqrt_LPF_real64_crs procedure, pass :: tensor_sqrt_complex64_crs procedure, pass :: tensor_log_complex64_crs procedure, pass :: fix_complex64_CRS procedure, pass :: tensor_d1_wave_kernel_complex64_crs procedure, pass :: tensor_wave_kernel_complex_64_crs procedure, pass :: tensor_wave_kernel_real_64_crs procedure, pass :: tensor_wave_kernel_LPF_real_64_crs procedure, pass :: tensor_wave_kernel_RHS_real_64_crs procedure, pass :: tensor_wave_kernel_RHS_LPF_real_64_crs procedure, pass :: tensor_wave_kernel_RHS_complex_64_crs procedure, pass :: tensor_cos_sqrt_cos_sqrt_complex64_crs procedure, pass :: tensor_cos_sqrt_sinc_sqrt_complex64_crs procedure, pass :: tensor_sinc_sqrt_cos_sqrt_complex64_crs procedure, pass :: tensor_sinc_sqrt_sinc_sqrt_complex64_crs generic, public :: tensor_exponential => tensor_exponential_complex64_crs, tensor_exponential_crs generic, public :: tensor_exp => tensor_exponential_complex64_crs, tensor_exponential_crs generic, public :: tensor_exp_sqrt => tensor_exp_sqrt_complex64_crs, tensor_exp_sqrt_crs generic, public :: tensor_cos_sqrt => tensor_cos_sqrt_complex64_crs, & tensor_cos_sqrt_real64_crs generic, public :: tensor_sinc_sqrt => tensor_sinc_sqrt_complex64_crs, & tensor_sinc_sqrt_real64_crs generic, public :: tensor_cos_sqrt_LPF => tensor_cos_sqrt_LPF_real64_crs generic, public :: tensor_t_sinc_sqrt_LPF => tensor_t_sinc_sqrt_LPF_real64_crs generic, public :: tensor_sqrt => tensor_sqrt_complex64_crs, tensor_sqrt_crs generic, public :: tensor_log => tensor_log_complex64_crs, tensor_log_crs !>>> not verified. generic, public :: tensor_cos_sqrt_cos_sqrt => tensor_cos_sqrt_cos_sqrt_complex64_crs generic, public :: tensor_cos_sqrt_sinc_sqrt => tensor_cos_sqrt_sinc_sqrt_complex64_crs generic, public :: tensor_sinc_sqrt_cos_sqrt => tensor_sinc_sqrt_cos_sqrt_complex64_crs generic, public :: tensor_sinc_sqrt_sinc_sqrt => tensor_sinc_sqrt_sinc_sqrt_complex64_crs !<<< not verified. generic, public :: tensor_wave_kernel => tensor_wave_kernel_complex_64_crs, & tensor_wave_kernel_real_64_crs generic, public :: tensor_wave_kernel_LPF => tensor_wave_kernel_LPF_real_64_crs generic, public :: tensor_wave_kernel_RHS => tensor_wave_kernel_RHS_real_64_crs, & tensor_wave_kernel_RHS_complex_64_crs, tensor_wave_kernel_RHS_LPF_real_64_crs generic, public :: tensor_d1_wave_kernel => tensor_d1_wave_kernel_complex64_crs generic, public :: fix => fix_complex64_CRS, fixCRS, fix_zeroCRS ! assemble large matrix by combining small matrices ! >> If you want to do this, use type(BCRS_), which is for block CRS format. !procedure,public :: assemble => assemble_CRS <- this is/will not be implemented. end type type :: BCRS_ type(CRS_),allocatable :: CRS(:,:) contains procedure,public :: set => setBCRS procedure,public :: add => addBCRS procedure,public :: showShape => showShapeBCRS procedure,public :: shape => shapeBCRS procedure,public :: row_range => row_range_BCRS procedure,public :: col_range => col_range_BCRS procedure,public :: matmul => matmulBCRS procedure,public :: to_dense => to_dense_BCRS procedure,public :: exp => expBCRS end type public :: operator(+) public :: operator(-) public :: operator(*) !public :: operator(/) interface operator(+) module procedure addCRS_and_CRS end interface interface operator(-) module procedure diffCRS_and_CRS end interface interface LAPACK_EIG module procedure LAPACK_EIG_DENSE, LAPACK_EIG_SPARSE end interface interface operator(*) module procedure multReal64_and_CRS, multCRS_and_Real64, multReal64_and_BCRS, multBCRS_and_Real64 end interface interface matmul module procedure :: matmul_CRS_CRS, matmul_CRS_vec end interface interface LOBPCG module procedure LOBPCG_CRS, LOBPCG_SINGLE_CRS end interface LOBPCG interface to_diag module procedure to_diag_vector_to_CRS end interface interface allocated module procedure allocated_CRS end interface interface speye module procedure speyeCOO end interface interface imag_I module procedure imag_ICOO end interface ! interface operator(/) ! module procedure divide_by_diagonal_vector_CRS, divide_by_scalar_CRS ! end interface interface to_COO module procedure to_COO_from_DenseMatrix, to_COO_from_ArrayObject end interface interface to_CRS module procedure to_CRS_from_DenseMatrix, to_CRS_from_ArrayObject end interface type :: Sparse_ type(COO_) :: COO type(CRS_) :: CRS type(CCS_) :: CCS end type contains subroutine initCOO(this, num_row) class(COO_), intent(inout) :: this integer(int32), intent(in) :: num_row allocate (this%row(num_row)) end subroutine function to_CRSCOO(this, remove_coo) result(CRS_version) class(COO_), intent(inout) :: this logical, optional, intent(in) :: remove_coo type(CRS_) :: CRS_version integer(int32) :: i, idx integer(int64) :: n, j CRS_version%col_idx = this%getAllCol() CRS_version%row_ptr = int(zeros(size(this%row) + 1)) CRS_version%row_ptr(1) = 1 do i = 2, size(this%row) + 1 if (.not. allocated(this%row(i - 1)%col)) then CRS_version%row_ptr(i) = CRS_version%row_ptr(i - 1) cycle end if CRS_version%row_ptr(i) = CRS_version%row_ptr(i - 1) + size(this%row(i - 1)%col) if (present(remove_coo)) then if (remove_coo) then deallocate (this%row(i - 1)%col) end if end if end do !CRS_version%val n = size(CRS_version%col_idx) allocate (CRS_version%val(n)) CRS_version%val(:) = 0.0d0 do i = 1, size(this%row) if (.not. allocated(this%row(i)%val)) then cycle end if idx = 0 do j = CRS_version%row_ptr(i), CRS_version%row_ptr(i + 1) - 1 idx = idx + 1 CRS_version%val(j) = this%row(i)%val(idx) end do if (present(remove_coo)) then if (remove_coo) then deallocate (this%row(i)%val) end if end if end do call this%remove() end function subroutine updateCOO(this, row, col, val) class(COO_), intent(inout) :: this integer(int32), intent(in) :: row integer(int32), intent(in) :: col real(real64), intent(in) :: val integer(int32) :: i, col_id if (row > this%DOF() .or. row < 1) return if (col > this%DOF() .or. col < 1) return if (.not. allocated(this%row)) then print *, "ERROR :: initCOO" print *, "Please call [COO]%init() before this operation." return end if if (.not. allocated(this%row(row)%val)) then this%row(row)%val = [val] this%row(row)%col = [col] else ! check duplication if (minval(abs(this%row(row)%col(:) - col)) == 0) then ! duplication do i = 1, size(this%row(row)%col) if (this%row(row)%col(i) == col) then col_id = i exit end if end do this%row(row)%val(col_id) = val else this%row(row)%val = this%row(row)%val//[val] this%row(row)%col = this%row(row)%col//[col] end if end if end subroutine subroutine addCOO(this, row, col, val) class(COO_), intent(inout) :: this type(COO_) :: buf integer(int32), intent(in) :: row integer(int32), intent(in) :: col real(real64), intent(in) :: val integer(int32) :: i, col_id,n !if (row > this%DOF() .or. row < 1) return !if (col > this%DOF() .or. col < 1) return if ( row < 1) return if (col < 1) return if(row > this%DOF())then ! add row buf = this n = this%DOF() call this%remove() call this%init(row) this%row(1:n) = buf%row(1:n) endif if (.not. allocated(this%row)) then print *, "ERROR :: initCOO" print *, "Please call [COO]%init() before this operation." return end if if (.not. allocated(this%row(row)%val)) then this%row(row)%val = [val] this%row(row)%col = [col] else ! check duplication if (minval(abs(this%row(row)%col(:) - col)) == 0) then ! duplication do i = 1, size(this%row(row)%col) if (this%row(row)%col(i) == col) then col_id = i exit end if end do this%row(row)%val(col_id) = this%row(row)%val(col_id) + val else this%row(row)%val = this%row(row)%val//[val] this%row(row)%col = this%row(row)%col//[col] end if end if end subroutine ! ############################################### ! ############################################### subroutine add_complexCOO(this, row, col, val) class(COO_), intent(inout) :: this integer(int32), intent(in) :: row integer(int32), intent(in) :: col complex(real64), intent(in) :: val integer(int32) :: i, col_id if (row > this%DOF() .or. row < 1) return if (col > this%DOF() .or. col < 1) return if (.not. allocated(this%row)) then print *, "ERROR :: initCOO" print *, "Please call [COO]%init() before this operation." return end if if (.not. allocated(this%row(row)%val_complex64)) then this%row(row)%val_complex64 = [val] this%row(row)%col = [col] else ! check duplication if (minval(abs(this%row(row)%col(:) - col)) == 0) then ! duplication do i = 1, size(this%row(row)%col) if (this%row(row)%col(i) == col) then col_id = i exit end if end do this%row(row)%val_complex64(col_id) = this%row(row)%val_complex64(col_id) + val else this%row(row)%val_complex64 = this%row(row)%val_complex64//[val] this%row(row)%col = this%row(row)%col//[col] end if end if end subroutine ! ############################################### ! ############################################### function getDenseMatrixCOO(this) result(dense_matrix) class(COO_), intent(in) :: this real(real64), allocatable :: dense_matrix(:, :) integer(int32) :: i, j dense_matrix = zeros(size(this%row), size(this%row)) do i = 1, size(this%row) print *, i if (allocated(this%row(i)%col)) then do j = 1, size(this%row(i)%col) dense_matrix(i, this%row(i)%col(j)) = this%row(i)%val(j) end do end if end do end function ! ############################################### ! ############################################### subroutine removeCOO(this) class(COO_), intent(inout) :: this if (allocated(this%row)) deallocate (this%row) end subroutine ! ############################################### function getAllColCOO(this) result(cols) class(COO_), intent(in) :: this type(COO_ROW_) :: row integer(int32), allocatable :: cols(:) integer(int32) :: num_total_cols, i num_total_cols = 0 do i = 1, size(this%row) if (allocated(this%row(i)%col)) then num_total_cols = num_total_cols + size(this%row(i)%col) else cycle end if end do allocate (cols(num_total_cols)) cols(:) = 0 num_total_cols = 0 do i = 1, size(this%row) if (allocated(this%row(i)%col)) then cols(num_total_cols + 1:num_total_cols + size(this%row(i)%col)) & = this%row(i)%col(1:size(this%row(i)%col)) num_total_cols = num_total_cols + size(this%row(i)%col) else cycle end if end do end function !recursive function getAllCol_as_row_objCOO(this,row_old) result(row_new) ! class(COO_),intent(in) :: this ! type(COO_Row_),optional,intent(in) :: row_old(:) ! type(COO_Row_) :: row_new ! type(COO_Row_),allocatable :: row_new_buf(:) ! ! integer(int32) :: num_row,half_row,num_elem ! ! print *, "hello" ! if(.not.present(row_old) )then ! print *, "hello" ! row_new = this%getAllCol_as_row_obj(this%row) ! ! else ! if(size(row_old) ==1 )then ! if(allocated(row_old(1)%col) )then ! row_new%col = row_old(1)%col ! else ! allocate(row_new%col(0) ) ! endif ! elseif(size(row_old) ==2 )then ! if(allocated(row_old(1)%col) .and. allocated(row_old(1)%col) )then ! row_new%col = row_old(1)%col ! row_new%col = row_new%col // row_old(2)%col ! elseif(allocated(row_old(1)%col))then ! row_new%col = row_old(1)%col ! elseif(allocated(row_old(2)%col))then ! row_new%col = row_old(2)%Col ! else ! allocate(row_new%col(0) ) ! endif ! else ! num_row = size(row_old) ! half_row = num_row/2 ! print *, half_row, num_row ! allocate(row_new_buf(2) ) ! row_new_buf(1) = this%getAllCol_as_row_obj(this%row(1:half_row) ) ! row_new_buf(2) = this%getAllCol_as_row_obj(this%row(half_row:) ) ! row_new%col = row_new_buf(1)%col // row_new_buf(2)%col ! endif ! endif ! !end function ! subroutine LanczosCRS(this, DiagonalVector, subDiagonalVector, V) class(CRS_), intent(in) :: this real(real64), allocatable, intent(inout) :: V(:, :)!,T(:,:) real(real64), allocatable, intent(inout) :: DiagonalVector(:) real(real64), allocatable, intent(inout) :: subDiagonalVector(:) real(real64), allocatable :: v1(:), w1(:), vj(:), wj(:) real(real64) :: alp1, beta_j, alp_j type(Random_) :: random integer(int32) :: i, N, j, m ! https://en.wikipedia.org/wiki/Lanczos_algorithm !N = size(A,1) N = size(this%row_ptr) - 1 V = zeros(N, N) !T = zeros(N,N) DiagonalVector = zeros(N) subDiagonalVector = zeros(N - 1) v1 = 2.0d0*random%randn(N) - 1.0d0 v1 = v1/norm(v1) w1 = this%matmul(v1) alp1 = dot_product(w1, v1) w1 = w1 - alp1*v1 beta_j = norm(w1) !T(1,1) = alp1 V(:, 1) = v1 DiagonalVector(1) = alp1 do i = 2, N beta_j = norm(w1) if (beta_j /= 0.0d0) then vj = w1/beta_j else vj = 2.0d0*random%randn(N) - 1.0d0 stop end if wj = this%matmul(vj) alp_j = dot_product(wj, vj) wj = wj - alp_j*vj - beta_j*v1 v1 = vj w1 = wj !T(i,i) = alp_j !T(i,i-1) = beta_j !T(i-1,i) = beta_j V(:, i) = vj DiagonalVector(i) = alp_j subDiagonalVector(i - 1) = beta_j end do end subroutine function matmulCRS(CRS, old_vector, run_mode, cache_size) result(new_vector) class(CRS_), intent(in) :: CRS real(real64), intent(in) :: Old_vector(:) real(real64), allocatable :: new_vector(:) integer(int32), optional, intent(in) :: run_mode integer(int32) :: mode_id integer(int32), optional, intent(in) :: cache_size mode_id = input(default=0, option=run_mode) if (mode_id == 1) then ! elemental mode new_vector = crs_matvec_generic_elemental_SparseClass( & CRS_value=CRS%val, & CRS_col=CRS%col_idx, & CRS_row_ptr=CRS%row_ptr, & old_vector=old_vector) else new_vector = crs_matvec_generic_SparseClass( & CRS_value=CRS%val, & CRS_col=CRS%col_idx, & CRS_row_ptr=CRS%row_ptr, & old_vector=old_vector, & cache_size=cache_size) end if end function function matmul_c_CRS(CRS, old_vector, c_routine) result(new_vector) class(CRS_), intent(in) :: CRS real(real64), intent(in) :: Old_vector(:) real(real64), allocatable :: new_vector(:) character(*), intent(in) :: c_routine integer(int64) :: n, col_size n = crs%size() col_size = size(crs%col_idx) allocate (new_vector(crs%size())) new_vector(:) = 0.0d0 call crs_spmv_real64(val=crs%val, row_ptr=crs%row_ptr, & col_idx=crs%col_idx, & old_vector=old_vector, new_vector=new_vector, n=n, col_size=col_size) end function ! function matmul_opencl_CRS(CRS,old_vector,opencl) result(new_vector) ! ! class(CRS_),intent(in) :: CRS ! real(real64),intent(in) :: Old_vector(:) ! real(real64),allocatable :: new_vector(:) ! logical,intent(in) :: opencl ! integer(int64) :: n,col_size ! ! n = crs%size() ! col_size=size(crs%col_idx) ! allocate(new_vector(crs%size() ) ) ! new_vector(:) = 0.0d0 ! call crs_spmv_real64_opencl(val=crs%val,row_ptr=crs%row_ptr,& ! col_idx=crs%col_idx,& ! old_vector=old_vector,new_vector=new_vector,n=n,col_size=col_size) ! ! end function function matmul_c_real32_CRS(CRS, old_vector, c_routine) result(new_vector) class(CRS_), intent(in) :: CRS real(real32), intent(in) :: Old_vector(:) real(real32), allocatable :: new_vector(:) character(*), intent(in) :: c_routine integer(int64) :: n, col_size n = crs%size() col_size = size(crs%col_idx) allocate (new_vector(crs%size())) new_vector(:) = 0.0d0 call crs_spmv_real32(val=real(crs%val), row_ptr=crs%row_ptr, & col_idx=crs%col_idx, & old_vector=old_vector, new_vector=new_vector, n=n, col_size=col_size) end function function matmul_real32_CRS(CRS, old_vector) result(new_vector) class(CRS_), intent(in) :: CRS real(real32), intent(in) :: Old_vector(:) real(real32), allocatable :: new_vector(:) if (CRS%dtype /= real32) then print *, "ERROR :: matmul_real32_CRS >> vector type is real32, but & & CRS%dtype /= real32. Plase use this%to_real32() before calling this." return end if new_vector = crs_matvec_generic_real32_SparseClass( & CRS_value=CRS%val_real32, & CRS_col=CRS%col_idx, & CRS_row_ptr=CRS%row_ptr, & old_vector=old_vector) end function function matmul_complex_CRS(CRS, old_vector) result(new_vector) class(CRS_), intent(in) :: CRS complex(real64), intent(in) :: Old_vector(:) complex(real64), allocatable :: new_vector(:) integer(int32) :: n, row integer(int64) :: CRS_id n = size(CRS%row_ptr) - 1 !if (size(old_vector) /= n) then ! print *, "ERROR crs_matvec :: inconsistent size for old_vector" ! return !end if new_vector = zeros(n) ! accerelation !$OMP parallel default(shared) private(CRS_id,row) !$OMP do reduction(+:new_vector) do row = 1, n do CRS_id = CRS%row_ptr(row), CRS%row_ptr(row + 1) - 1 new_vector(row) = new_vector(row) + CRS%val(CRS_id)*old_vector(CRS%col_idx(CRS_id)) end do end do !$OMP end do !$OMP end parallel ! new_vector = crs_matvec_generic_complex_SparseClass(& ! CRS_value= dcmplx(CRS%val),& ! CRS_col=CRS%col_idx,& ! CRS_row_ptr=CRS%row_ptr,& ! old_vector=old_vector) end function function crs_matvec_generic_SparseClass(CRS_value, CRS_col, CRS_row_ptr, old_vector, & cache_size) result(new_vector) real(real64), intent(in) :: CRS_value(:), Old_vector(:) integeR(int32), intent(in):: CRS_col(:) integeR(int64), intent(in):: CRS_row_ptr(:) integer(int32), optional, intent(in) :: cache_size real(real64), allocatable :: new_vector(:) integer(int32) :: i, j, n, gid, lid, row, col integer(int64) :: CRS_id integer(int64) :: block_idx, num_block, mem_size, num_row_in_block integer(int64), allocatable :: row_idx_from(:), row_idx_to(:) !> x_i = A_ij b_j n = size(CRS_row_ptr) - 1 !if (size(old_vector) /= n) then ! print *, "ERROR crs_matvec :: inconsistent size for old_vector" ! return !end if new_vector = zeros(n) ! accerelation ! do concurrent (row=1:n) ! new_vector(row) = new_vector(row) + dot_product( & ! CRS_value(CRS_row_ptr(row):CRS_row_ptr(row+1)-1), & ! old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row+1)-1) )& ! ) ! enddo !v2.0 !v2.0 ! !if(.not.present(cache_size) )then !$OMP parallel default(shared) !$OMP do reduction(+:new_vector) do row = 1, n new_vector(row) = new_vector(row) + dot_product( & CRS_value(CRS_row_ptr(row):CRS_row_ptr(row + 1) - 1), & old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row + 1) - 1)) & ) end do !$OMP end do !$OMP end parallel ! else ! ! This does not work ! ! ! (1) 行列およびベクトルのメモリ消費量を算出する. ! mem_size = sizeof(CRS_value) + sizeof(CRS_col)+sizeof(CRS_row_ptr)& ! +sizeof(new_vector)+sizeof(old_vector) ! ! (2) メモリ消費量/キャッシュサイズ=ブロック数を算出する. ! ! ! (3) 1~nを,1~k, k+1~2*k, 2k+1~3*k,...とブロックに分割し, ! ! j番目の開始idxをrow_idx_from(j), 終了idxをrow_idx_to(j)へ格納 ! if(mem_size <= cache_size)then ! num_block = 1 ! row_idx_from = [1] ! row_idx_to = [n] ! ! else ! num_block = mem_size/cache_size ! if(mod(n,num_block)==0 )then ! allocate(row_idx_from(num_block) ) ! allocate(row_idx_to(num_block) ) ! num_row_in_block = n/num_block ! do i=1,num_block ! row_idx_from(i) = (i-1)*num_row_in_block+1 ! row_idx_to(i) = i*num_row_in_block ! enddo ! else ! allocate(row_idx_from(num_block+1) ) ! allocate(row_idx_to(num_block+1) ) ! num_row_in_block = n/num_block ! do i=1,num_block ! row_idx_from(i) = (i-1)*num_row_in_block+1 ! row_idx_to(i) = i*num_row_in_block ! enddo ! row_idx_from(num_block+1) = num_block*num_row_in_block+1 ! row_idx_to(num_block+1) = n ! endif ! endif ! ! do block_idx = 1,num_block ! !$OMP parallel default(shared) ! !$OMP do reduction(+:new_vector) ! do row = row_idx_from(block_idx), row_idx_to(block_idx) ! new_vector(row) = new_vector(row) + dot_product( & ! CRS_value(CRS_row_ptr(row):CRS_row_ptr(row+1)-1), & ! old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row+1)-1) )& ! ) ! enddo ! !$OMP end do ! !$OMP end parallel ! enddo ! endif !<v1.0> ! !$OMP parallel default(shared) private(CRS_id,col) ! !$OMP do reduction(+:new_vector) ! do row = 1, n ! do concurrent(CRS_id=CRS_row_ptr(row):CRS_row_ptr(row+1)-1) ! new_vector(row) = new_vector(row) + CRS_value(CRS_id)*old_vector(CRS_col(CRS_id)) ! enddo ! enddo ! !$OMP end do ! !$OMP end parallel ! end function ! ################################################################### function crs_matvec_generic_elemental_SparseClass(CRS_value, CRS_col, CRS_row_ptr, old_vector) result(new_vector) real(real64), intent(in) :: CRS_value(:), Old_vector(:) integeR(int32), intent(in):: CRS_col(:) integeR(int64), intent(in):: CRS_row_ptr(:) real(real64), allocatable :: new_vector(:) integer(int32) :: i, j, n, gid, lid, row, col integer(int64) :: CRS_id !> x_i = A_ij b_j n = size(CRS_row_ptr) - 1 !if (size(old_vector) /= n) then ! print *, "ERROR crs_matvec :: inconsistent size for old_vector" ! return !end if new_vector = zeros(n) ! accerelation ! do concurrent (row=1:n) ! new_vector(row) = new_vector(row) + dot_product( & ! CRS_value(CRS_row_ptr(row):CRS_row_ptr(row+1)-1), & ! old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row+1)-1) )& ! ) ! enddo !v2.0 !v2.0 !$OMP parallel default(shared) !$OMP do reduction(+:new_vector) do row = 1, n new_vector(row) = new_vector(row) + dot_product( & CRS_value(CRS_row_ptr(row):CRS_row_ptr(row + 1) - 1), & old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row + 1) - 1)) & ) end do !$OMP end do !$OMP end parallel !<v1.0> ! !$OMP parallel default(shared) private(CRS_id,col) ! !$OMP do reduction(+:new_vector) ! do row = 1, n ! do concurrent(CRS_id=CRS_row_ptr(row):CRS_row_ptr(row+1)-1) ! new_vector(row) = new_vector(row) + CRS_value(CRS_id)*old_vector(CRS_col(CRS_id)) ! enddo ! enddo ! !$OMP end do ! !$OMP end parallel ! end function ! ################################################################### subroutine spmv_as_subroutine_CRS(this, old_vector, new_vector) class(CRS_), intent(in) :: this real(real64), intent(in) :: old_vector(:) real(real64), intent(out) :: new_vector(:) integer(int32) :: row new_vector(:) = 0.0d0 !$OMP parallel default(shared) !$OMP do reduction(+:new_vector) do row = 1, size(new_vector) new_vector(row) = new_vector(row) + dot_product( & this%val(this%row_ptr(row):this%row_ptr(row + 1) - 1), & old_vector(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1)) & ) end do !$OMP end do !$OMP end parallel end subroutine function crs_matvec_generic_real32_SparseClass(CRS_value, CRS_col, CRS_row_ptr, old_vector) result(new_vector) real(real32), intent(in) :: CRS_value(:), Old_vector(:) integeR(int32), intent(in):: CRS_col(:) integeR(int64), intent(in):: CRS_row_ptr(:) real(real32), allocatable :: new_vector(:) integer(int32) :: i, j, n, gid, lid, row, col integer(int64) :: CRS_id !> x_i = A_ij b_j n = size(CRS_row_ptr) - 1 !if (size(old_vector) /= n) then ! print *, "ERROR crs_matvec :: inconsistent size for old_vector" ! return !end if new_vector = zeros(n) ! accerelation ! do concurrent (row=1:n) ! new_vector(row) = new_vector(row) + dot_product( & ! CRS_value(CRS_row_ptr(row):CRS_row_ptr(row+1)-1), & ! old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row+1)-1) )& ! ) ! enddo !v2.0 !$OMP parallel default(shared) !$OMP do reduction(+:new_vector) do row = 1, n new_vector(row) = new_vector(row) + dot_product( & CRS_value(CRS_row_ptr(row):CRS_row_ptr(row + 1) - 1), & old_vector(CRS_col(CRS_row_ptr(row):CRS_row_ptr(row + 1) - 1)) & ) end do !$OMP end do !$OMP end parallel !<v1.0> ! !$OMP parallel default(shared) private(CRS_id,col) ! !$OMP do reduction(+:new_vector) ! do row = 1, n ! do concurrent(CRS_id=CRS_row_ptr(row):CRS_row_ptr(row+1)-1) ! new_vector(row) = new_vector(row) + CRS_value(CRS_id)*old_vector(CRS_col(CRS_id)) ! enddo ! enddo ! !$OMP end do ! !$OMP end parallel ! end function ! ################################################################### !function crs_opencl_matvec(CRS_row_ptr,CRS_col,CRS_value,old_vector) result(new_vector) ! integer(int32), intent(in) :: CRS_row_ptr(:),CRS_col(:) ! real(real64),intent(in) :: CRS_value(:), old_vector(:) ! ! !end function ! ################################################################### function crs_matvec_generic_complex_SparseClass(CRS_value, CRS_col, CRS_row_ptr, old_vector) result(new_vector) complex(real64), intent(in) :: CRS_value(:), Old_vector(:) integeR(int32), intent(in):: CRS_col(:) integeR(int64), intent(in):: CRS_row_ptr(:) complex(real64), allocatable :: new_vector(:) integer(int32) :: i, j, n, gid, lid, row, col integer(int64) :: CRS_id !> x_i = A_ij b_j n = size(CRS_row_ptr) - 1 !if (size(old_vector) /= n) then ! print *, "ERROR crs_matvec :: inconsistent size for old_vector" ! return !end if new_vector = zeros(n) ! accerelation !$OMP parallel default(shared) private(CRS_id,row,col) !$OMP do reduction(+:new_vector) do row = 1, n do CRS_id = CRS_row_ptr(row), CRS_row_ptr(row + 1) - 1 new_vector(row) = new_vector(row) + CRS_value(CRS_id)*old_vector(CRS_col(CRS_id)) end do end do !$OMP end do !$OMP end parallel ! !$OMP parallel do default(shared) private(CRS_id,col) ! do row = 1 , n ! do CRS_id = CRS_row_ptr(row), CRS_row_ptr(row+1)-1 ! col = CRS_col(CRS_id) ! !$OMP atomic ! new_vector(row) = new_vector(row) + CRS_value(CRS_id)*old_vector(col) ! enddo ! enddo ! !$OMP end parallel do end function ! ################################################################### !function crs_opencl_matvec(CRS_row_ptr,CRS_col,CRS_value,old_vector) result(new_vector) ! integer(int32), intent(in) :: CRS_row_ptr(:),CRS_col(:) ! real(real64),intent(in) :: CRS_value(:), old_vector(:) ! ! !end function ! ################################################################### subroutine eigCRS(this, Eigen_vectors, eigen_values) class(CRS_), intent(in) :: this real(real64), allocatable :: A(:, :), V(:, :) real(real64), allocatable :: D(:), E(:) real(real64), allocatable :: WORK(:) integer(int32), allocatable :: IWORK(:), IFAIL(:) real(real64), allocatable, intent(inout) :: Eigen_vectors(:, :), eigen_values(:) integer(int32) :: num_eigen, INFO real(real64) :: VL, VU, ABSTOL integer(int32):: IL, IU character(1) :: JOBZ = "V" character(1) :: RANGE = "A" call this%Lanczos(DiagonalVector=D, subDiagonalVector=E, V=V) !call print("--") !call print(V) num_eigen = size(D) eigen_values = zeros(size(D)) eigen_vectors = zeros(size(D), num_eigen) WORK = zeros(5*size(D)) IWORK = int(zeros(5*size(D))) IFAIL = int(zeros(size(D))) ABSTOL = dble(1.0e-14) ! LAPACK call DSTEVX(JOBZ, RANGE, size(D), & D, E, VL, VU, IL, IU, ABSTOL, & num_eigen, eigen_values, eigen_vectors, size(D), & WORK, IWORK, IFAIL, INFO) !eigen_Vectors = matmul(V,matmul(eigen_values, transpose(V) ) ) eigen_Vectors = matmul(V, eigen_vectors) do i_i = 1, 3 eigen_vectors(:, i_i) = eigen_vectors(:, i_i)/norm(eigen_vectors(:, i_i)) eigen_vectors(:, i_i) = eigen_vectors(:, i_i)/eigen_vectors(3, i_i) end do end subroutine subroutine initCRS(this, val, col_idx, row_ptr) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: col_idx(:) integer(int64), intent(in) :: row_ptr(:) real(real64), intent(in) :: val(:) this%val = val this%col_idx = col_idx this%row_ptr = row_ptr end subroutine function to_denseCRS(this) result(dense_mat) class(CRS_), intent(in) :: this real(real64), allocatable :: dense_mat(:, :) integer(int32) :: i, n, m, row, col integeR(int64) :: j n = size(this%row_ptr) - 1 m = maxval(this%col_idx) allocate (dense_mat(n, m)) dense_mat(:, :) = 0.0d0 do row = 1, n do j = this%row_ptr(row), this%row_ptr(row + 1) - 1 col = this%col_idx(j) dense_mat(row, col) = this%val(j) end do end do end function pure function addCRS_and_CRS(CRS1, CRS2) result(CRS_ret) type(CRS_), intent(in) :: CRS1, CRS2 type(CRS_) :: CRS_ret integer(int32) :: i, j, row integer(int64) :: col_2, col_1 ! sum : CRS_ret = CRS1 + CRS2 CRS_ret = CRS1 ! ignore fill-in !!$OMP parallel do private(col_2,col_1) do row = 1, size(CRS2%row_ptr) - 1 ! for each row do col_2 = CRS2%row_ptr(row), CRS2%row_ptr(row + 1) - 1 ! search same col do col_1 = CRS1%row_ptr(row), CRS1%row_ptr(row + 1) - 1 if (CRS1%col_idx(col_1) == CRS2%col_idx(col_2)) then !!$OMP atomic CRS_ret%val(col_1) = CRS_ret%val(col_1) + CRS2%val(col_2) exit end if end do end do end do !!$OMP end parallel do end function pure function diffCRS_and_CRS(CRS1, CRS2) result(CRS_ret) type(CRS_), intent(in) :: CRS1, CRS2 type(CRS_) :: CRS_ret integer(int32) :: i, j, row integer(int64) :: col_2, col_1 ! sum : CRS_ret = CRS1 + CRS2 CRS_ret = CRS1 ! ignore fill-in do row = 1, size(CRS2%row_ptr) - 1 ! for each row do col_2 = CRS2%row_ptr(row), CRS2%row_ptr(row + 1) - 1 ! search same col do col_1 = CRS1%row_ptr(row), CRS1%row_ptr(row + 1) - 1 if (CRS1%col_idx(col_1) == CRS2%col_idx(col_2)) then CRS_ret%val(col_1) = CRS_ret%val(col_1) - CRS2%val(col_2) exit end if end do end do end do end function ! ############################################# ! ############################################# function multReal64_and_CRS(scalar64, CRS1) result(CRS_ret) real(real64), intent(in) :: scalar64 type(CRS_), intent(in) :: CRS1 type(CRS_) :: CRS_ret integer(int32) :: i CRS_ret = CRS1 !$OMP parallel do do i = 1, size(CRS_ret%val) CRS_ret%val(i) = scalar64*CRS_ret%val(i) end do !$OMP end parallel do end function ! ############################################# ! ############################################# function multReal64_and_BCRS(scalar64, BCRS1) result(BCRS_ret) real(real64), intent(in) :: scalar64 type(BCRS_), intent(in) :: BCRS1 type(BCRS_) :: BCRS_ret integer(int32) :: i,j BCRS_ret = BCRS1 do i = 1, size(BCRS_ret%CRS,1) do j = 1, size(BCRS_ret%CRS,1) if(.not.allocated(BCRS_ret%CRS(i,j)%val)) cycle BCRS_ret%CRS(i,j) = scalar64*BCRS_ret%CRS(i,j) enddo end do end function ! ############################################# ! ############################################# function multBCRS_and_Real64(BCRS1,scalar64) result(BCRS_ret) real(real64), intent(in) :: scalar64 type(BCRS_), intent(in) :: BCRS1 type(BCRS_) :: BCRS_ret integer(int32) :: i,j BCRS_ret = BCRS1 do i = 1, size(BCRS_ret%CRS,1) do j = 1, size(BCRS_ret%CRS,1) if(.not.allocated(BCRS_ret%CRS(i,j)%val)) cycle BCRS_ret%CRS(i,j) = scalar64*BCRS_ret%CRS(i,j) enddo end do end function ! ############################################# ! ############################################# function multCRS_and_Real64(CRS1, scalar64) result(CRS_ret) type(CRS_), intent(in) :: CRS1 real(real64), intent(in) :: scalar64 type(CRS_) :: CRS_ret integer(int32)::i CRS_ret = CRS1 !$OMP parallel do do i = 1, size(CRS_ret%val) CRS_ret%val = scalar64*CRS_ret%val end do !$OMP end parallel do end function pure function DOFCOO(this) result(Degree_of_freedom) class(COO_), intent(in) :: this integer(int32) :: Degree_of_freedom if (allocated(this%row)) then Degree_of_freedom = size(this%row) else Degree_of_freedom = 0 end if end function pure function DOFCRS(this) result(Degree_of_freedom) class(CRS_), intent(in) :: this integer(int32) :: Degree_of_freedom if (allocated(this%row_ptr)) then Degree_of_freedom = size(this%row_ptr) - 1 else Degree_of_freedom = 0 end if end function ! ####################################################### pure function getCOO(this, row, col) result(ret) class(COO_), intent(in) :: this integer(int32), intent(in) :: row, col real(real64) :: ret integer(int32) :: i ret = 0.0d0 if (.not. allocated(this%row(row)%col)) then return end if do i = 1, size(this%row(row)%col, 1) if (this%row(row)%col(i) == col) then ret = this%row(row)%val(i) end if end do end function pure function neCOO(this) result(ret) class(COO_), intent(in) :: this real(real64) :: ret integer(int32) :: i ! get number of entity ret = 0.0d0 do i = 1, size(this%row) if (allocated(this%row(i)%col)) then ret = ret + size(this%row(i)%col) end if end do end function pure function maxvalCOO(this) result(ret) class(COO_), intent(in) :: this real(real64) :: ret real(real64), allocatable :: val(:) integer(int32) :: i, itr ! get number of entity ret = 0.0d0 itr = 0 do i = 1, size(this%row) if (allocated(this%row(i)%col)) then if (itr == 0) then val = this%row(i)%val(:) ret = maxval(val) itr = itr + 1 else val = this%row(i)%val(:) if (ret < maxval(val)) then ret = maxval(val) else cycle end if end if end if end do end function ! ################################################## function sizeCRS(this) result(n) class(CRS_), intent(in) :: this integer(int32) :: n if (allocated(this%row_ptr)) then n = size(this%row_ptr) - 1 else n = 0 end if end function ! ################################################## ! ################################################## function shapeCRS(this) result(ret) class(CRS_), intent(in) :: this integer(int32) :: ret(1:2) if(.not.allocated(this))then ret(:) = 0 return endif if (allocated(this%row_ptr)) then ret(1) = size(this%row_ptr) - 1 else ret(1) = 0 end if ret(2) = maxval(this%col_idx) end function ! ################################################## ! ################################################## subroutine updateCRS(this, row, col, val) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: row, col real(real64), intent(in) :: val integer(int64) :: i, j ! update but ignore fill-in if (col > maxval(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1))) return if (col < minval(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1))) return do i = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (this%col_idx(i) == col) then this%val(i) = val return end if end do end subroutine ! ################################################## ! ################################################## subroutine addCRS(this, row, col, val) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: row, col real(real64), intent(in) :: val integer(int64) :: i, j ! update but ignore fill-in if (col > maxval(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1))) return if (col < minval(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1))) return do i = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (this%col_idx(i) == col) then this%val(i) = this%val(i) + val return end if end do end subroutine ! ################################################## ! ################################################## function getCRS(this, row, col) result(val) class(CRS_), intent(in) :: this integer(int32), intent(in) :: row, col real(real64) :: val integer(int64) :: i ! update but ignore fill-in val = 0.0d0 if (col > maxval(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1))) return if (col < minval(this%col_idx(this%row_ptr(row):this%row_ptr(row + 1) - 1))) return do i = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (this%col_idx(i) == col) then val = this%val(i) return end if end do end function ! ################################################## ! ################################################## logical function is_nonzeroCRS(this, row, col) class(CRS_), intent(in) :: this integer(int32), intent(in) :: row, col real(real64) :: val integer(int64) :: i is_nonzeroCRS = .false. if (row > this%size()) return !if (col > maxval(this%col_idx(this%row_ptr(row):this%row_ptr(row+1)-1)) ) return !if (col < minval(this%col_idx(this%row_ptr(row):this%row_ptr(row+1)-1)) ) return do i = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (this%col_idx(i) == col) then is_nonzeroCRS = .true. return end if end do end function ! ################################################## function lumpedCRS(this) result(ret) class(CRS_), intent(in) :: this real(real64), allocatable :: ret(:) ret = this%diag(cell_centered=.true.) end function ! ################################################## function diagCRS(this, cell_centered) result(diag_vec) class(CRS_), intent(in) :: this real(real64), allocatable :: diag_vec(:) logical, optional, intent(in) :: cell_centered integeR(int64) :: i, j if (present(cell_centered)) then if (cell_centered) then diag_vec = zeros(this%size()) do i = 1, this%size() ! for rows do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 ! for columns diag_vec(i) = diag_vec(i) + this%val(j) end do end do return end if end if diag_vec = zeros(this%size()) do i = 1, this%size() do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 if (this%col_idx(j) == i) then diag_vec(i) = this%val(j) exit end if end do end do end function ! ################################################## recursive subroutine ILUCRS(this, fill_in_order, RHS, debug) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: fill_in_order ! real(real64), optional, intent(inout) :: RHS(:) ! RHS vector real(real64), allocatable :: diag_vec(:) integer(int32), allocatable :: col_line(:), range_col(:, :) type(CCS_) :: ccs logical, optional, intent(in) :: debug real(real64) :: A_k_j, A_ik integer(int32) :: i, k, l, n, m, row, col, col_idx, row_idx, kk, jj integer(int64) :: j integer(int32), allocatable :: nonzero_k_list(:), nonzero_j_list(:) logical :: debug_mode_on = .false. if (present(debug)) then debug_mode_on = debug end if if (present(RHS)) then ! Given: Ax = y ! A = LU call this%ILU(fill_in_order) ! Forward substitution do i = 2, this%size() do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 if (this%col_idx(j) < i) then ! execute Forward substitution !print *, i, this%col_idx(j),this%val(j) RHS(i) = RHS(i) - this%val(j)*RHS(this%col_idx(j)) end if end do end do ! Backward substitution diag_vec = this%diag() do i = this%size(), 1, -1 if (diag_vec(i) == 0.0d0) cycle RHS(i) = RHS(i)/diag_vec(i) end do ! Forward substitution do i = this%size(), 1, -1 do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 if (this%col_idx(j) > i) then ! execute Forward substitution RHS(i) = RHS(i) - this%val(j)/diag_vec(i)*RHS(this%col_idx(j)) end if end do end do return end if select case (fill_in_order) case default print *, "fill-in order", fill_in_order, "is not implemented. Try 0" stop case (0) n = this%size() if (debug_mode_on) then print *, "[ILU(0)] >> started" end if diag_vec = this%diag() range_col = zeros(n, 2) !!$OMP parallel do !do row=1,n ! range_col(row,1) = minval(this%col_idx(this%row_ptr(row):this%row_ptr(row+1)-1)) ! range_col(row,2) = maxval(this%col_idx(this%row_ptr(row):this%row_ptr(row+1)-1)) !enddo !!$OMP end parallel do do i = 2, n if (debug_mode_on) then print *, "[ILU(0)] >> U", i, "/", n end if ! >>>>>>> slow nonzero_k_list = this%col_idx(this%row_ptr(i):this%row_ptr(i + 1) - 1) !! >> notice!! !! each col should be sorted. !do k = range_col(i,1) , i-1 do kk = 1, size(nonzero_k_list) k = nonzero_k_list(kk) if (k >= i) cycle A_ik = this%get(i, k)/diag_vec(k) call this%update(row=i, col=k, val=A_ik) !$OMP parallel default(shared) private(j) !$OMP do do jj = 1, size(nonzero_k_list) j = nonzero_k_list(jj) if (j < k + 1) cycle call this%add(row=int(i), col=int(j), val=-A_ik*this%get(int(k), int(j))) end do !$OMP end do !$OMP end parallel diag_vec(i) = this%get(int(i), int(i)) end do end do ! <<<<<<<<< slow ! !! ccs !ccs = this%to_CCS() ! [5, 6, 7] ! [2, 8, 9] ! [3, 4, 10] ! [ 5, 6, 7] ! [10/5, 20, 23] ! [15/5, 50, 67] !! ! 高速バージョン ! do i=2,n ! if(debug_mode_on)then ! print *, "[ILU(0)] >> U",i,"/",n ! endif ! ! >>>>>>> slow ! k = i-1 ! do row_idx = ccs%col_ptr(i-1),ccs%col_ptr(i)-1 ! if(ccs%row_idx(row_idx) < i ) cycle ! if(i==3 ) then ! print *,"dbg",ccs%row_idx(row_idx),i-1,ccs%val(row_idx), diag_vec(i-1) ! ! endif ! ! ccs%val(row_idx) = ccs%val(row_idx)/diag_vec(i-1) ! if(ccs%row_idx(row_idx) == i ) then ! A_ik = ccs%val(row_idx) ! endif ! enddo ! if(i==3 ) stop ! ! do j = this%row_ptr(i),this%row_ptr(i+1)-1 ! if(this%col_idx(j) < i ) then ! cycle ! elseif(this%col_idx(j) == i ) then ! diag_vec(i) = diag_vec(i) - A_ik*this%get( k,i ) ! call this%update(i,i,diag_vec(i) ) ! else ! call this%update(row=i,col= this%col_idx(j) ,& ! val=this%get(i,this%col_idx(j) ) - A_ik*this%get(k,this%col_idx(j) ) ) ! endif ! enddo ! ! enddo ! ! [ 5, 6, 7] ! ! [10/5, 20, 23] ! ! [15/5, 50, 67] ! call this%load(ccs=ccs,position="L") ! return ! end select end subroutine ! ################################################ subroutine ILU_matvecCRS(this, old_vector, new_vector) class(CRS_), intent(in) :: this ! ILU factorlized matrix integer(int64) :: i, j real(real64), intent(in) :: old_vector(:) real(real64), allocatable, intent(inout) :: new_vector(:) real(real64), allocatable :: diag_vec(:) real(real64) :: new_vector_i new_vector = old_vector ! Forward substitution do i = 2, this%size() new_vector_i = new_vector(i) !$OMP parallel !$OMP do reduction(+:new_vector_i) do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 if (this%col_idx(j) < i) then ! execute Forward substitution new_vector_i = new_vector_i - this%val(j)*new_vector(this%col_idx(j)) end if end do !$OMP end do !$OMP end parallel new_vector(i) = new_vector_i end do ! Backward substitution diag_vec = this%diag() !$OMP parallel !$OMP do do i = this%size(), 1, -1 if (diag_vec(i) == 0.0d0) then diag_vec(i) = 1.0d0 end if new_vector(i) = new_vector(i)/diag_vec(i) end do !$OMP end do !$OMP end parallel ! Forward substitution do i = this%size(), 1, -1 new_vector_i = new_vector(i) !$OMP parallel !$OMP do reduction(+:new_vector_i) do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 if (this%col_idx(j) > i) then ! execute Forward substitution new_vector(i) = new_vector(i) - this%val(j)/diag_vec(i)*new_vector(this%col_idx(j)) end if end do !$OMP end do !$OMP end parallel new_vector(i) = new_vector_i end do end subroutine ! ################################################## function to_CCSCRS(this) result(CCS) class(CRS_), intent(in) :: this type(CCS_) :: CCS integer(int64) :: i, j, n, col integer(int32), allocatable :: inst_counter(:) inst_counter = int(zeros(this%size())) CCS%col_ptr = int(zeros(size(this%row_ptr))) CCS%row_idx = int(zeros(size(this%col_idx))) CCS%val = zeros(size(this%val)) do i = 1, size(this%col_idx) CCS%col_ptr(this%col_idx(i)) = CCS%col_ptr(this%col_idx(i)) + 1 end do ![2,3,3, 3, 2, 0] do i = 1, size(CCS%col_ptr) - 1 CCS%col_ptr(i + 1) = CCS%col_ptr(i + 1) + CCS%col_ptr(i) end do ! [2,3,3, 3, 2, 0] !>[2,5,8,11,13, 13] do i = size(CCS%col_ptr), 2, -1 CCS%col_ptr(i) = CCS%col_ptr(i - 1) end do ! [2,5,8,11,13, 13] !>[2,2,5, 8,11, 13] CCS%col_ptr(:) = CCS%col_ptr(:) + 1 ! [2,2,5, 8,11, 13] !>[2,3,6, 9,12, 14] CCS%col_ptr(1) = 1 ! [2,3,6, 9,12, 14] !>[1,3,6, 9,12, 14] do i = 1, size(this%row_ptr) - 1 do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 col = this%col_idx(j) CCS%row_idx(CCS%col_ptr(col) + inst_counter(col)) = i CCS%val(CCS%col_ptr(col) + inst_counter(col)) = this%val(j) inst_counter(col) = inst_counter(col) + 1 end do end do end function !#################################################### subroutine loadCRS(this, CCS, Position) class(CRS_), intent(inout) :: this type(CCS_), optional, intent(in) :: CCS character(*), optional, intent(in) :: Position integer(int32) :: i, j if (present(CCS)) then if (Position == "L") then print *, CCS%col_ptr print *, CCS%row_idx print *, CCS%val do i = 1, size(CCS%col_ptr) - 1 do j = ccs%col_ptr(i), ccs%col_ptr(i + 1) - 1 if (ccs%row_idx(j) <= i) then cycle else print *, ccs%row_idx(j), i call this%update(row=ccs%row_idx(j), col=i, val=ccs%val(j)) end if end do end do end if end if end subroutine subroutine eyesCOO(this, n) class(COO_) :: this integer(int32), intent(in) :: n integer(int32) :: i call this%init(n) do i = 1, n call this%set(i, i, 1.0d0) end do end subroutine subroutine poissonCOO(this, n) class(COO_) :: this integer(int32), intent(in) :: n integer(int32) :: i call this%init(n) call this%set(1, 1, -2.0d0) call this%set(1, 1 + 1, 1.0d0) do i = 2, n - 1 call this%set(i, i - 1, 1.0d0) call this%set(i, i, -4.0d0) call this%set(i, i + 1, 1.0d0) end do call this%set(n, n - 1, 1.0d0) call this%set(n, n, -2.0d0) end subroutine subroutine LOBPCG_single_CRS(A, B, lambda, X, alpha, tol, debug) type(CRS_), intent(in) :: A, B real(real64), allocatable, intent(inout) :: X(:) real(real64), allocatable, intent(inout) :: lambda real(real64), intent(in) :: alpha logical, optional, intent(in) :: debug logical :: debug_mode type(Random_) :: random real(real64), allocatable :: r(:), rho integer(int32) :: n, i integer(int32) :: MAX_ITR = 1000000 real(real64), intent(in) :: tol debug_mode = input(default=.false., option=debug) ! BLOPEX IN HYPRE AND PETSC, SIAM Eq. (2.2) ! number of eigen value :: m ! initialize X and lambda n = A%size() X = 2.0d0*random%randn(n) - 1.0d0 lambda = 0.0d0 !Single-vector version do i = 1, MAX_ITR rho = dot_product(x, A%matmul(x))/dot_product(x, B%matmul(x)) r = A%matmul(x) - rho*B%matmul(x) x = x - alpha*r if (debug_mode) then print *, i, norm(r) end if if (norm(r) < tol) then if (debug_mode) then print *, "[OK] converged." end if exit else cycle end if end do if (i == MAX_ITR) print *, "[ERROR] LOBPCG NOT CONVERGED." lambda = rho end subroutine subroutine LOBPCG_CRS(A, B, lambda, X, m, MAX_ITR, TOL, debug) type(CRS_), intent(in) :: A, B real(real64), allocatable, intent(out) :: X(:, :) real(real64), allocatable, intent(out) :: lambda(:) real(real64), intent(in) :: TOL integer(int32), intent(in) :: m, MAX_ITR logical, optional, intent(in) :: debug logical :: debug_mode type(Random_) :: random real(real64), allocatable :: r(:, :), p(:, :), V(:, :), A_small(:, :), AV(:, :), & b_small(:, :), lambda_small(:), XAX(:, :), AX(:, :), OR(:, :), BX(:, :), XBX(:, :), & Bm_small(:, :), BV(:, :) real(real64) :: initial_R integer(int32) :: n, i, j, k !integer(int32) :: MAX_ITR = 1000000 ! References: ! Knyazev, A. V., Argentati, M. E., Lashuk, I. & Ovtchinnikov, E. E. Block Locally Optimal Preconditioned Eigenvalue Xolvers (BLOPEX) in hypre and PETSc. SIAM J. Sci. Comput. 29, 2224–2239 (2007). ! https://en.wikipedia.org/wiki/LOBPCG ! https://webpark1378.sakura.ne.jp/nagai/note_141009_LOBPCG.pdf debug_mode = input(default=.false., option=debug) ! BLOPEX IN HYPRE AND PETSC, SIAM Eq. (2.2) ! number of eigen value :: m ! debug ! initialize X and lambda n = A%size() !X = random%randn(n,m) X = random%randn(n, m)! + random%randn(n,m) !+ random%randn(n,m) do i = 1, m X(:, i) = X(:, i)/norm(X(:, i)) end do call GramSchmidt(X, size(X, 1), size(X, 2), X) lambda = zeros(m) R = zeros(n, m) P = zeros(n, m) AV = zeros(n, 3*m) AX = zeros(n, m) A_small = zeros(3*m, 3*m) V = zeros(n, 3*m) ! m x m 固有値問題 do i = 1, m AX(:, i) = A%matmul(X(:, i)) end do BX = zeros(size(AX, 1), size(AX, 2)) do i = 1, m BX(:, i) = B%matmul(X(:, i)) end do XAX = matmul(transpose(X(:, 1:m)), AX) XBX = matmul(transpose(X(:, 1:m)), BX) call LAPACK_EIG(XAX, XBX, b_small, lambda_small) X = matmul(X, b_small) do i = 1, m AX(:, i) = A%matmul(X(:, i)) end do BX = zeros(size(AX, 1), size(AX, 2)) do i = 1, m BX(:, i) = B%matmul(X(:, i)) end do do i = 1, m R(:, i) = AX(:, i) - BX(:, i)*lambda_small(i) end do V = zeros(n, 2*m) V(:, 1:m) = X(:, :) V(:, m + 1:2*m) = R(:, :) call GramSchmidt(V, size(V, 1), size(V, 2), V) do k = 1, size(X, 2) V(:, k) = V(:, k)/norm(V(:, k)) end do X(:, :) = V(:, 1:m) R(:, :) = V(:, m + 1:2*m) AV = zeros(n, 2*m) do j = 1, 2*m ! n x n, n x 2m AV(:, j) = A%matmul(V(:, j)) end do BV = zeros(n, 2*m) do j = 1, 2*m ! n x n, n x 2m BV(:, j) = B%matmul(V(:, j)) end do A_small = matmul(transpose(V), AV) Bm_small = matmul(transpose(V), BV) call LAPACK_EIG(A_small, Bm_small, b_small, lambda_small) do j = 1, m X(:, j) = matmul(V, b_small(:, j)) end do AX = zeros(n, m) do i = 1, m AX(:, i) = A%matmul(X(:, i)) end do BX = zeros(size(AX, 1), size(AX, 2)) do j = 1, m BX(:, j) = B%matmul(X(:, j)) end do R = zeros(n, m) do j = 1, m R(:, j) = AX(:, j) - BX(:, j)*lambda_small(j) end do OR = zeros(n, 2*m) OR(:, 1:m) = 0.0d0 OR(:, m + 1:2*m) = R(:, :) P = zeros(n, m) do i = 1, m P(:, i) = matmul(OR, b_small(:, i)) end do V = zeros(n, 3*m) AV = zeros(n, 3*m) BV = zeros(n, 3*m) AX = zeros(n, m) BX = zeros(n, m) do i = 1, MAX_ITR V(:, 1:m) = X(:, :) V(:, m + 1:2*m) = R(:, :) V(:, 2*m + 1:3*m) = P(:, :) ! Gram-Scmidtを計算する. call GramSchmidt(V, size(V, 1), size(V, 2), V) do k = 1, size(X, 2) V(:, k) = V(:, k)/norm(V(:, k)) end do do k = size(X, 2) + size(R, 2) + 1, size(V, 2) V(:, k) = V(:, k)/norm(V(:, k)) end do X(:, :) = V(:, 1:m) R(:, :) = V(:, m + 1:2*m) P(:, :) = V(:, 2*m + 1:3*m) !$OMP parallel do do j = 1, 3*m AV(:, j) = A%matmul(V(:, j)) end do !$OMP end parallel do !$OMP parallel do do j = 1, 3*m BV(:, j) = B%matmul(V(:, j)) end do !$OMP end parallel do A_small = matmul(transpose(V), AV) Bm_small = matmul(transpose(V), BV) ! LAPACKの固有値求めるやつを呼ぶ call LAPACK_EIG(A_small, Bm_small, b_small, lambda_small) X = matmul(V, b_small(:, 1:m)) !$OMP parallel do do j = 1, m AX(:, j) = A%matmul(X(:, j)) end do !$OMP end parallel do !$OMP parallel do do j = 1, m BX(:, j) = B%matmul(X(:, j)) end do !$OMP end parallel do !$OMP parallel do do j = 1, m R(:, j) = AX(:, j) - BX(:, j)*lambda_small(j) end do !$OMP end parallel do V(:, 1:m) = 0.0d0 ! matmul:: (n,3*m) x (n,3*m) P = matmul(V, b_small(:, 1:m)) ! Detect convergence if (i == 1) then initial_R = maxval(abs(R)) end if if (debug_mode) then print *, i, maxval(abs(R))/initial_R end if if (maxval(abs(R))/initial_R < tol) then if (debug_mode) then print *, "[OK] converged." end if lambda = lambda_small(1:m) exit else cycle end if end do if (i == MAX_ITR) print *, "[ERROR] LOBPCG NOT CONVERGED." end subroutine ! ################################################# ! ################################################# subroutine GramSchmidt(mat_v, m, n, mat_v_out) integer, intent(in)::m, n real(real64), intent(in)::mat_v(1:m, 1:n) real(real64), intent(inout)::mat_v_out(1:m, 1:n) integer::i, j real(real64) :: nai !real(real64) :: v(1:m),vi(1:m),viold(1:m) real(real64), allocatable :: v(:), vi(:) real(real64), allocatable :: viold(:) real(real64)::norm_v mat_v_out = mat_v allocate (viold(1:m)) allocate (v(1:m)) allocate (vi(1:m)) do i = 1, n viold = mat_v_out(:, i) if (dot_product(viold, viold) == 0.0d0) cycle do j = 1, i - 1 nai = dot_product(mat_v_out(1:m, j), viold(1:m)) vi = viold - nai*mat_v_out(:, j) viold = vi end do norm_v = sqrt(dble(dot_product(viold, viold))) mat_v_out(:, j) = viold/norm_v end do end subroutine ! ################################################# ! ################################################# subroutine LAPACK_EIG_DENSE(A, B, x, lambda, debug) real(real64), intent(in) :: A(:, :), B(:, :) real(real64), allocatable, intent(inout):: x(:, :), lambda(:) logical, optional, intent(in) :: debug logical :: debug_mode !>>>>>>>>>>>>>> INPUT integer(int32) :: ITYPE = 1 ! A*x = (lambda)*B*x character(1) :: JOBZ = 'V' ! Compute eigenvalues and eigenvectors. character(1) :: UPLO = 'U' ! Upper triangles of A and B are stored; !<<<<<<<<<<<<<< INPUT integer(int32) :: N ! order of matrix real(real64), allocatable :: AP(:) real(real64), allocatable :: BP(:) real(real64), allocatable :: W(:) real(real64), allocatable :: Z(:, :), M(:) real(real64), allocatable :: WORK(:), ID(:) integer(int32), allocatable :: IWORK(:), IDS(:) integer(int32) :: LDZ integer(int32) :: LWORK integer(int32) :: LIWORK integer(int32) :: INFO integer(int32) :: from, to, k, j, i integer(int32), allocatable :: new_id_from_old_id(:) real(real64), allocatable :: dense_mat(:, :) !logical :: use_lanczos type(CRS_) :: crs debug_mode = input(default=.false., option=debug) !>>>>>>>>>>>>>> INPUT N = size(A, 1) LDZ = N LWORK = 1 + 6*N + 2*N**2 LIWORK = 3 + 5*N !<<<<<<<<<<<<<< INPUT !>>>>>>>>>>>>>> INPUT/OUTPUT AP = zeros(N*(N + 1)/2) BP = zeros(N*(N + 1)/2) ! Upper triangle matrix AP = to_UpperTriangle(A)!UpperTriangularMatrix(CRS_val=this%A_CRS_val,CRS_col=this%A_CRS_index_col,& !CRS_rowptr=this%A_CRS_index_row) BP = to_UpperTriangle(B)!UpperTriangularMatrix(CRS_val=this%B_CRS_val,CRS_col=this%B_CRS_index_col,& !CRS_rowptr=this%B_CRS_index_row) !<<<<<<<<<<<<<< INPUT/OUTPUT !>>>>>>>>>>>>>> INPUT N = size(A, 1) LDZ = N LWORK = 1 + 6*N + 2*N**2 LIWORK = 3 + 5*N !<<<<<<<<<<<<<< INPUT !>>>>>>>>>>>>>> OUTPUT W = zeros(N) Z = zeros(LDZ, N) WORK = zeros(LWORK) IWORK = zeros(LIWORK) INFO = 0 !<<<<<<<<<<<<<< OUTPUT if (debug_mode) then print *, ">> Solver :: LAPACK/DSPGVD" end if call DSPGVD(ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, & LWORK, IWORK, LIWORK, INFO) X = Z lambda = W end subroutine ! ################################################# ! ################################################# subroutine LAPACK_EIG_SPARSE(A, B, x, lambda, debug) type(CRS_), intent(in) :: A, B real(real64), allocatable :: Ad(:, :), Bd(:, :) real(real64), allocatable, intent(inout):: x(:, :), lambda(:) logical, optional, intent(in) :: debug logical :: debug_mode !>>>>>>>>>>>>>> INPUT integer(int32) :: ITYPE = 1 ! A*x = (lambda)*B*x character(1) :: JOBZ = 'V' ! Compute eigenvalues and eigenvectors. character(1) :: UPLO = 'U' ! Upper triangles of A and B are stored; !<<<<<<<<<<<<<< INPUT integer(int32) :: N ! order of matrix real(real64), allocatable :: AP(:) real(real64), allocatable :: BP(:) real(real64), allocatable :: W(:) real(real64), allocatable :: Z(:, :), M(:) real(real64), allocatable :: WORK(:), ID(:) integer(int32), allocatable :: IWORK(:), IDS(:) integer(int32) :: LDZ integer(int32) :: LWORK integer(int32) :: LIWORK integer(int32) :: INFO integer(int32) :: from, to, k, j, i integer(int32), allocatable :: new_id_from_old_id(:) real(real64), allocatable :: dense_mat(:, :) !logical :: use_lanczos type(CRS_) :: crs Ad = A%to_dense() Bd = B%to_dense() debug_mode = input(default=.false., option=debug) !>>>>>>>>>>>>>> INPUT N = size(Ad, 1) LDZ = N LWORK = 1 + 6*N + 2*N**2 LIWORK = 3 + 5*N !<<<<<<<<<<<<<< INPUT !>>>>>>>>>>>>>> INPUT/OUTPUT AP = zeros(N*(N + 1)/2) BP = zeros(N*(N + 1)/2) ! Upper triangle matrix AP = to_UpperTriangle(Ad)!UpperTriangularMatrix(CRS_val=this%A_CRS_val,CRS_col=this%A_CRS_index_col,& !CRS_rowptr=this%A_CRS_index_row) BP = to_UpperTriangle(Bd)!UpperTriangularMatrix(CRS_val=this%B_CRS_val,CRS_col=this%B_CRS_index_col,& !CRS_rowptr=this%B_CRS_index_row) !<<<<<<<<<<<<<< INPUT/OUTPUT !>>>>>>>>>>>>>> INPUT N = size(Ad, 1) LDZ = N LWORK = 1 + 6*N + 2*N**2 LIWORK = 3 + 5*N !<<<<<<<<<<<<<< INPUT !>>>>>>>>>>>>>> OUTPUT W = zeros(N) Z = zeros(LDZ, N) WORK = zeros(LWORK) IWORK = zeros(LIWORK) INFO = 0 !<<<<<<<<<<<<<< OUTPUT if (debug_mode) then print *, ">> Solver :: LAPACK/DSPGVD" end if call DSPGVD(ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, & LWORK, IWORK, LIWORK, INFO) X = Z lambda = W end subroutine ! ################################################# function to_UpperTriangle(A) result(ret) real(real64), intent(in) :: A(:, :) real(real64), allocatable :: ret(:) integer(int32) :: i, j, n, m n = size(A, 1) ret = zeros(n*(n + 1)/2) m = 0 do i = 1, size(A, 2) do j = 1, i m = m + 1 ret(m) = A(j, i) end do end do end function ! ################################################# subroutine BICGSTAB_CRSSparse(this, x, b, debug, tol) class(CRS_), intent(inout) :: this real(real64), allocatable, intent(inout) :: x(:) logical, optional, intent(in) :: debug real(real64), optional, intent(in) :: tol real(real64) :: er real(real64), intent(in) :: b(:) if (.not. allocated(x)) then x = zeros(size(b)) end if er = dble(1.0e-14) if (present(tol)) then er = tol end if call bicgstab_CRS_SparseClass( & a=this%val, & ptr_i=this%row_ptr, & index_j=this%col_idx, & x=x, & b=b, & itrmax=1000000, & er=er, & relative_er=er, & debug=debug & ) end subroutine ! ################################################# ! ##################################################### subroutine bicgstab_CRS_SparseClass(a, ptr_i, index_j, x, b, itrmax, er, relative_er, debug) integer(int64), intent(in) :: ptr_i(:) integer(int32), intent(in) :: index_j(:) integer(int32), intent(in) :: itrmax real(real64), intent(in) :: a(:) real(real64), intent(in) :: b(:) real(real64), intent(in) :: er real(real64), optional, intent(in) :: relative_er real(real64), intent(inout) :: x(:) logical, optional, intent(in) :: debug logical :: speak = .false. integer(int32) itr, i, j, n real(real64) alp, bet, c1, c2, c3, ev, vv, rr, er0, init_rr, re_er0 real(real64), allocatable:: r(:), r0(:), p(:), y(:), e(:), v(:), pa(:), ax(:) er0 = er if (present(debug)) then speak = debug end if n = size(b) if (speak) print *, "BiCGSTAB STARTED >> DOF:", n allocate (r(n), r0(n), p(n), y(n), e(n), v(n)) r(:) = b(:) if (speak) print *, "BiCGSTAB >> [1] initialize" call SpMV_CRS_Sparse(CRS_value=a, CRS_col=index_j, & CRS_row_ptr=ptr_i, old_vector=x, new_vector=ax) r = b - ax if (speak) print *, "BiCGSTAB >> [2] dp1" c1 = dot_product(r, r) !call omp_dot_product(r,r,c1) init_rr = c1 if (speak) print *, "BiCGSTAB >> [2] init_rr", c1 !if(speak) print *, "BiCGSTAB >> |r|^2 = ",init_rr !if (c1 < er0) return p(:) = r(:) r0(:) = r(:) do itr = 1, itrmax if (speak) print *, "BiCGSTAB >> ["//str(itr)//"] initialize" c1 = dot_product(r0, r) !call omp_dot_product(r0,r,c1) y(:) = 0.0d0 call SpMV_CRS_Sparse(CRS_value=a, CRS_col=index_j, & CRS_row_ptr=ptr_i, old_vector=p, new_vector=y) c2 = dot_product(r0, y) !call omp_dot_product(r0,y,c2) alp = c1/c2 e(:) = r(:) - alp*y(:) v(:) = 0.0d0 call SpMV_CRS_Sparse(CRS_value=a, CRS_col=index_j, & CRS_row_ptr=ptr_i, old_vector=e, new_vector=v) if (speak) print *, "BiCGSTAB >> ["//str(itr)//"] half" ev = dot_product(e, v) vv = dot_product(v, v) !call omp_dot_product(e,v,ev) !call omp_dot_product(v,v,vv) if (vv == 0.0d0) stop "Bicgstab devide by zero" c3 = ev/vv if (speak) print *, "BiCGSTAB >> c3 = ev/vv", c3 x(:) = x(:) + alp*p(:) + c3*e(:) r(:) = e(:) - c3*v(:) rr = dot_product(r, r) !call omp_dot_product(r,r,rr) if (itr == 1) then re_er0 = rr end if if (speak) then print *, sqrt(rr) end if if (present(relative_er)) then if (sqrt(rr/re_er0) < relative_er) then exit end if end if ! write(*,*) 'itr, er =', itr,rr if (sqrt(rr) < er0) exit c1 = dot_product(r0, r) !call omp_dot_product(r0,r,c1) bet = c1/(c2*c3) if ((c2*c3) == 0.0d0) stop "Bicgstab devide by zero" p(:) = r(:) + bet*(p(:) - c3*y(:)) end do end subroutine !=============================================================== subroutine SpMV_CRS_Sparse(CRS_value, CRS_col, CRS_row_ptr, old_vector, new_vector, precondition) real(real64), intent(in) :: CRS_value(:), Old_vector(:) integeR(int32), intent(in):: CRS_col(:) integeR(int64), intent(in):: CRS_row_ptr(:) type(CRS_), optional, intent(in) :: precondition real(real64), allocatable, intent(inout) :: new_vector(:) real(real64), allocatable :: precon_old_vector(:) integer(int32) :: i, j, n, gid, lid, row, col integer(int64) :: CRS_id !> x_i = A_ij b_j n = size(CRS_row_ptr) - 1 !if (size(old_vector) /= n) then ! print *, "ERROR crs_matvec :: inconsistent size for old_vector" ! return !end if if (.not. allocated(new_vector)) then new_vector = zeros(n) else new_vector(:) = 0.0d0 end if if (present(precondition)) then call precondition%ILU_matvec(old_vector=old_vector, new_vector=precon_old_vector) !$OMP parallel do default(shared) private(CRS_id,col) do row = 1, n do CRS_id = CRS_row_ptr(row), CRS_row_ptr(row + 1) - 1 col = CRS_col(CRS_id) !$OMP atomic new_vector(row) = new_vector(row) + CRS_value(CRS_id)*precon_old_vector(col) end do end do !$OMP end parallel do else !$OMP parallel do default(shared) private(CRS_id,col) do row = 1, n do CRS_id = CRS_row_ptr(row), CRS_row_ptr(row + 1) - 1 col = CRS_col(CRS_id) !$OMP atomic new_vector(row) = new_vector(row) + CRS_value(CRS_id)*old_vector(col) end do end do !$OMP end parallel do end if end subroutine ! ################################################################### function divide_by_CRS(this, diag_vector) result(ret_crs) class(CRS_), intent(in) :: this real(real64), intent(in) :: diag_vector(:) type(CRS_) :: ret_crs integer(int64) :: i, j ret_crs = this if (size(diag_vector) == 1) then ret_crs%val(:) = ret_crs%val(:)/diag_vector(1) return end if if (this%size() /= size(diag_vector)) then print *, "ERROR >> divide_by_CRS >> this%size()/=size(diag_vector) " stop end if do i = 1, ret_crs%size() do j = ret_crs%row_ptr(i), ret_crs%row_ptr(i + 1) - 1 ret_crs%val(j) = ret_crs%val(j)/diag_vector(i) end do end do end function ! ################################################################### ! ################################################################### subroutine divide_by_vector_CRS(this, diag_vector) class(CRS_), intent(inout) :: this real(real64), intent(in) :: diag_vector(:) integer(int64) :: i, j if (size(diag_vector) == 1) then this%val(:) = this%val(:)/diag_vector(1) return end if if (this%size() /= size(diag_vector)) then print *, "ERROR >> divide_by_CRS >> this%size()/=size(diag_vector) " stop end if do i = 1, this%size() do j = this%row_ptr(i), this%row_ptr(i + 1) - 1 this%val(j) = this%val(j)/diag_vector(i) end do end do end subroutine ! ################################################################### function mult_by_CRS(this, diag_vector) result(ret_crs) class(CRS_), intent(in) :: this real(real64), intent(in) :: diag_vector(:) type(CRS_) :: ret_crs integer(int64) :: i, j ret_crs = this if (size(diag_vector) == 1) then ret_crs%val(:) = ret_crs%val(:)/diag_vector(1) return end if if (this%size() /= size(diag_vector)) then print *, "ERROR >> divide_by_CRS >> this%size()/=size(diag_vector) " stop end if do i = 1, ret_crs%size() do j = ret_crs%row_ptr(i), ret_crs%row_ptr(i + 1) - 1 ret_crs%val(j) = ret_crs%val(j)*diag_vector(i) end do end do end function ! ###################################################### function matmul_CRS_vec(A, b) result(vec) type(CRS_), intent(in) :: A real(real64), intent(in) :: b(:) real(real64), allocatable :: vec(:) vec = A%matmul(b) end function ! ###################################################### function matmul_CRS_CRS(A, B) result(CRS) type(CRS_), intent(in) :: A, B type(COO_) :: COO type(CRS_) :: CRS type(CCS_) :: B_CCS real(real64), allocatable :: col_vec(:) integer(int32) :: col, row, col_ptr, i, j integer(int32), allocatable :: A_col_idx(:), B_row_idx(:) real(real64), allocatable :: A_col_val(:), B_row_val(:) ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! Considerably slow!! DO NOT USE! ! matrix multiplication ! C_ij = A_ik B_kj call COO%init(A%size()) B_CCS = B%to_CCS() ! ! avoiding fill-in (inaccurate) ! if(A%size() > B%size() )then ! CRS = A ! CRS%val(:) = 0.0d0 ! else ! CRS = B ! CRS%val(:) = 0.0d0 ! endif ! ! do row=1,size(CRS%row_ptr)-1 ! do col_ptr=CRS%row_ptr(row),CRS%row_ptr(row+1)-1 ! col = CRS%col_idx(col_ptr) ! A_col_idx = A%col_idx( A%row_ptr(row):A%row_ptr(row+1)-1 ) ! B_row_idx = B_CCS%row_idx( B_CCS%col_ptr(col):B_CCS%col_ptr(col+1)-1 ) ! A_col_val = A%val( A%row_ptr(row):A%row_ptr(row+1)-1 ) ! B_row_val = B_CCS%val( B_CCS%col_ptr(col):B_CCS%col_ptr(col+1)-1 ) ! if(minval(A_col_idx) > maxval(B_row_idx))cycle ! if(maxval(A_col_idx) < minval(B_row_idx))cycle ! do i=1,size(A_col_idx) ! do j=1, size(B_row_idx) ! if(A_col_idx(i)==B_row_idx(j) )then ! CRS%val(col) = CRS%val(col) + A_col_val(i)*B_row_val(j) ! endif ! enddo ! enddo ! enddo ! enddo do col = 1, size(B_CCS%col_ptr) - 1 col_vec = A%matmul(B_CCS%get_column(col)) !!$OMP parallel do do row = 1, size(col_vec) if (col_vec(row) /= 0.0d0) then !!$OMP atomic call COO%set(row, col, col_vec(row)) end if end do !!$OMP end parallel do end do CRS = COO%to_CRS() end function ! ################################################### function get_column_CCS(this, col) result(ret) class(CCS_), intent(in) :: this integer(int32), intent(in) :: col real(real64), allocatable :: ret(:) integer(int32) :: row, n n = size(this%col_ptr, 1) - 1 ret = zeros(n) do row = this%col_ptr(col), this%col_ptr(col + 1) - 1 ret(this%row_idx(row)) = this%val(row) end do end function ! ##################################################### subroutine randomCOO(this, n, percent) class(COO_), intent(inout) :: this integer(int32), intent(in) :: n real(real32), optional, intent(in) :: percent real(real32) :: fill_percent integer(int32) :: m, i, row, col real(real64) :: val type(Random_) :: random fill_percent = input(default=10.0, option=percent) m = maxval([int(dble(n)*dble(n)*dble(percent)/100.0d0), 1]) call this%init(n) do i = 1, m row = int(n*random%random()) + 1 col = int(n*random%random()) + 1 val = random%gauss(mu=0.0d0, sigma=1.0d0) call this%set(row, col, val) call this%set(col, row, val) end do end subroutine ! ##################################################### subroutine eyesCRS(this, n) class(CRS_), intent(inout) :: This type(COO_) :: coo type(CRS_) :: buf integer(int32), intent(in) :: n integer(int32) :: i call coo%init(n) do i = 1, n call coo%set(i, i, 1.0d0) end do buf = coo%to_CRS() this%row_ptr = buf%row_ptr this%col_idx = buf%col_idx this%val = buf%val end subroutine ! ################################################### ! ################################################### subroutine poissonCRS(this, n) class(CRS_),intent(inout) :: this type(COO_) :: coo type(CRS_) :: buf integer(int32),intent(in) :: n call coo%poisson(n) buf = coo%to_crs() this%row_ptr = buf%row_ptr this%col_idx = buf%col_idx this%val = buf%val end subroutine ! ################################################### ! ################################################### function tensor_exponential_crs(this, itr_tol, tol, x, dt, fix_idx, fix_val) result(expA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: x(:) real(real64), optional, intent(in) :: dt real(real64), allocatable :: expA_v(:), increA_v(:), bhat(:) real(real64), intent(in) :: tol integer(int32), intent(in)::itr_tol ! real(real64),allocatable::increA(:,:) real(real64) :: increment, NN, t integer(int32) :: i, j, n real(real64), optional, intent(in) :: fix_val(:) integer(int32), optional, intent(in)::fix_idx(:) type(CRS_) :: Amatrix ! if(.not. allocated(expA) )allocate(expA(size(A,1),size(A,2) )) ! allocate(increA(size(A,1),size(A,2) )) ! if(size(A,1)/=size(A,2)) stop "ERROR:tensor exp is not a square matrix" ! expA_v = x ! do n=1,size(expA,1) ! expA(n,n)=1.0d0 ! enddo t = input(default=1.0d0, option=dt) ! exp(At) = I + At + 1/2*(At)^2 + 1/6*(At)^3 + .... ! exp(At)v = Iv + (At)v + 1/2*(At)^2 v + 1/6*(At)^3 v+ .... ! exp(At)v = v + (At)v + 1/2*(At)^2 v + 1/6*(At)^3 v+ .... ! expA_v = x ! do i=1,itr_tol ! increA_v = x ! do j=1,i ! increA_v = t/dble(j)*this%matmul(increA_v) ! enddo ! expA_v = expA_v + increA_v ! if(dot_product(increA_v,increA_v) < tol )exit ! ! enddo ! return ! i==1,2 ! 0-th order term if (present(fix_idx)) then Amatrix = this bhat = zeros(this%size()) call Amatrix%fix(idx=fix_idx, RHS=bhat, val=fix_val) increA_v = x expA_v = zeros(size(x)) expA_v = expA_v + increA_v ! 1-st order term increA_v = Amatrix%matmul(t*increA_v) - bhat*dt expA_v = expA_v + increA_v do i = 2, itr_tol increA_v = 1.0d0/(i)*Amatrix%matmul(t*increA_v) !- bhat*dt expA_v = expA_v + increA_v if (dot_product(increA_v, increA_v) < tol) exit end do else increA_v = x expA_v = zeros(size(x)) expA_v = expA_v + increA_v ! 1-st order term increA_v = this%matmul(t*increA_v) expA_v = expA_v + increA_v do i = 2, itr_tol increA_v = 1.0d0/(i)*this%matmul(t*increA_v) expA_v = expA_v + increA_v if (dot_product(increA_v, increA_v) < tol) exit end do end if end function ! ################################################### function tensor_exp_sqrt_crs(this, v, tol, coeff, itrmax, binomial) result(exp_sqrtA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), allocatable :: dv(:), exp_sqrtA_v(:) logical, optional, intent(in) :: binomial complex(real64), optional, intent(in) :: coeff integer(int32) :: i integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol complex(real64) :: coeffi if (present(coeff)) then coeffi = coeff else coeffi = 1.0d0 end if dv = v exp_sqrtA_v = zeros(size(v)) exp_sqrtA_v = exp_sqrtA_v + dv ! 1-st order term dv = coeffi*this%tensor_sqrt(v=dv, tol=tol, itrmax=itrmax, binomial=binomial) exp_sqrtA_v = exp_sqrtA_v + dv do i = 2, itrmax if (i == 1) then cycle end if dv = 1.0d0/(i)*coeffi*this%tensor_sqrt(v=dv, tol=tol, itrmax=itrmax, binomial=binomial) exp_sqrtA_v = exp_sqrtA_v + dv if (dot_product(dv, dv) < tol) exit end do end function ! ##################################################### ! ################################################### function tensor_sqrt_crs(this, v, tol, itrmax, binomial, r) result(sqrtA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), allocatable :: dv(:), sqrtA_v(:), logA_v(:) integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol real(real64) :: coeff, nCr logical, optional, intenT(in) :: binomial real(real64), optional, intent(in) :: r if (present(binomial)) then if (binomial) then ! 二項係数によるsqrt(A)の近似 if (present(r)) then coeff = 1.0d0/maxval(this%val*r) else coeff = 1.0d0/maxval(this%val*5) end if sqrtA_v = 0.0d0*v dv = v do k = 0, itrmax nCr = gamma(0.50d0 + 1.0d0)/gamma(k + 1.0d0)/gamma(0.50d0 - k + 1) sqrtA_v = sqrtA_v + nCr*dv dv = (-dv + coeff*this%matmul(dv)) if (norm(dv)/size(dv) < tol) exit end do sqrtA_v = sqrtA_v/sqrt(coeff) return end if end if dv = v sqrtA_v = zeros(size(v)) sqrtA_v = sqrtA_v + dv ! 1-st order term dv = 0.50d0*this%tensor_log(v=dv, tol=tol, itrmax=itrmax) sqrtA_v = sqrtA_v + dv do k = 2, itrmax if (k == 1) then cycle end if dv = 1.0d0/dble(k)*0.50d0*this%tensor_log(v=dv, tol=tol, itrmax=itrmax) sqrtA_v = sqrtA_v + dv if (dot_product(dv, dv) < tol) exit end do end function ! ##################################################### ! ################################################### function tensor_log_crs_modified_Sparse(this, v, tol, itrmax, r) result(sqrtA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), allocatable :: dv(:), sqrtA_v(:) integer(int32) :: k, i integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol real(real64), optional, intent(in) :: r !real(real64) :: c real(real64), allocatable :: c(:), lambda_mat(:, :), X(:, :), lambda(:) real(real64) :: c_val type(CRS_) :: I_CRS ! call I_CRS%eyes(this%size() ) ! call LAPACK_EIG(A=this,B=I_CRS,x=x,lambda=lambda) ! lambda_mat = zeros(this%size(),this%size()) ! do i=1,this%size() ! lambda_mat(i,i) = log(lambda(i)) ! enddo ! ! ! shor algorithm ! sqrtA_v = matmul(matmul( x, matmul(lambda_mat,transpose(x)) ),v ) ! return !c = maxval(this%val)*1.0d0 !c = this%diag()*10.0d0 ! テイラー展開中心をdiagで決める. if (present(r)) then c_val = r else c_val = maxval(abs(this%val)) end if !c = ones(this%size() ) !c = 1.0d0 ! k = 0 ! Martin, J. F. P. On the exponential representation of solutions of linear differential equations. J. Differ. Equ. 4, 257–279 (1968). ! ln(this) = Ln(this) = ln(exp(C) X ) - C sqrtA_v = -(c_val)*v do k = 1, itrmax if (k == 1) then !dv = k/c_val*(-c_val*v + this%matmul(v) ) dv = 1.0d0/k*(exp(c_val)*this%matmul(v) - v) else dv = (-1.0d0)*dble(k)/dble(k + 1)*(exp(c_val)*this%matmul(dv) - dv) end if sqrtA_v = sqrtA_v + dv if (norm(abs(dv)) < tol) return end do end function ! ##################################################### ! ################################################### function tensor_log_crs(this, v, tol, itrmax, r) result(sqrtA_v) ! wikipedia's difinition ! it is accurate only when "this" is a diagonally dominant matrix class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), allocatable :: dv(:), sqrtA_v(:) integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol !real(real64) :: c real(real64) :: c real(real64), optional, intent(in) :: r !real(real64) :: c !c = maxval(this%diag()) !c = maxval(this%val)*1.0d0 c = maxval(abs(this%diag()))*2.0d0 ! テイラー展開中心をdiagで決める. !c = 1.0d0 ! k = 0 if (present(r)) then c = r end if sqrtA_v = log(c)*v do k = 1, itrmax if (k == 1) then dv = (1.0d0)/k*(v - 1.0d0/c*this%matmul(v)) else dv = (1.0d0)*dble(k)/dble(k + 1)*(dv - 1.0d0/c*this%matmul(dv)) end if sqrtA_v = sqrtA_v - dv if (norm(abs(dv)) < tol) return end do end function ! ##################################################### ! ##################################################### subroutine fixCRS(this, idx, val, RHS, only_row) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: idx(:) real(real64), optional, intent(inout) :: RHS(:) real(real64), intent(in) :: val(:) integer(int64) :: i, j, k, id logical, optional, intent(in) :: only_row if (present(only_row)) then if (only_row) then do i = 1, size(idx) id = idx(i) do j = this%row_ptr(id), this%row_ptr(id + 1) - 1 if (this%col_idx(j) == id) then this%val(j) = 1.0d0 else this%val(j) = 0.0d0 end if end do end do return end if end if do i = 1, size(idx) id = idx(i) do j = 1, size(this%row_ptr) - 1 if (j == id) then do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 0.0d0 else this%val(k) = 0.0d0 end if end do else do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then RHS(j) = RHS(j) - this%val(k)*val(i) this%val(k) = 0.0d0 end if end do end if end do end do do i = 1, size(idx) id = idx(i) do j = 1, size(this%row_ptr) - 1 if (j == id) then do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 1.0d0 end if end do end if end do end do do i = 1, size(idx) RHS(id) = val(i) end do end subroutine ! ##################################################### ! ##################################################### subroutine fix_zeroCRS(this, idx) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: idx(:) integer(int64) :: i, j, k, id do i = 1, size(idx) id = idx(i) do j = 1, size(this%row_ptr) - 1 if (j == id) then do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 0.0d0 else this%val(k) = 0.0d0 end if end do else do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 0.0d0 end if end do end if end do end do do i = 1, size(idx) id = idx(i) do j = 1, size(this%row_ptr) - 1 if (j == id) then do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 1.0d0 end if end do end if end do end do end subroutine ! ##################################################### ! >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> complex >>>>>>>>>>>>>>>>>>>>>>> ! ################################################### function tensor_exponential_complex64_crs(this, itr_tol, tol, x, dt, fix_idx, fix_val) result(expA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: x(:) real(real64), optional, intent(in) :: dt complex(real64), allocatable :: expA_v(:), increA_v(:), bhat(:) real(real64), intent(in) :: tol integer(int32), intent(in)::itr_tol ! complex(real64),allocatable::increA(:,:) real(real64) :: increment, NN, t integer(int32) :: i, j, n complex(real64), optional, intent(in) :: fix_val(:) integer(int32), optional, intent(in)::fix_idx(:) type(CRS_) :: Amatrix ! if(.not. allocated(expA) )allocate(expA(size(A,1),size(A,2) )) ! allocate(increA(size(A,1),size(A,2) )) ! if(size(A,1)/=size(A,2)) stop "ERROR:tensor exp is not a square matrix" ! expA_v = x ! do n=1,size(expA,1) ! expA(n,n)=1.0d0 ! enddo t = input(default=1.0d0, option=dt) ! exp(At) = I + At + 1/2*(At)^2 + 1/6*(At)^3 + .... ! exp(At)v = Iv + (At)v + 1/2*(At)^2 v + 1/6*(At)^3 v+ .... ! exp(At)v = v + (At)v + 1/2*(At)^2 v + 1/6*(At)^3 v+ .... ! expA_v = x ! do i=1,itr_tol ! increA_v = x ! do j=1,i ! increA_v = t/dble(j)*this%matmul(increA_v) ! enddo ! expA_v = expA_v + increA_v ! if(dot_product(increA_v,increA_v) < tol )exit ! ! enddo ! return ! i==1,2 ! 0-th order term if (present(fix_idx)) then Amatrix = this bhat = zeros(this%size()) call Amatrix%fix(idx=fix_idx, RHS=bhat, val=fix_val) increA_v = x expA_v = zeros(size(x)) expA_v = expA_v + increA_v ! 1-st order term increA_v = Amatrix%matmul(t*increA_v) - bhat*dt expA_v = expA_v + increA_v do i = 2, itr_tol if (i == 1) then cycle end if increA_v = 1.0d0/(i)*Amatrix%matmul(t*increA_v) !- bhat*dt expA_v = expA_v + increA_v if (abs(dot_product(increA_v, increA_v)) < tol) exit end do else increA_v = x expA_v = zeros(size(x)) expA_v = expA_v + increA_v ! 1-st order term increA_v = this%matmul(t*increA_v) expA_v = expA_v + increA_v do i = 2, itr_tol if (i == 1) then cycle end if increA_v = 1.0d0/(i)*this%matmul(t*increA_v) expA_v = expA_v + increA_v if (abs(dot_product(increA_v, increA_v)) < tol) exit end do end if end function ! ################################################### function tensor_exp_sqrt_complex64_crs(this, v, tol, itrmax, coeff, fix_idx, binomial, r) result(exp_sqrtA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), optional, intent(in) :: coeff complex(real64), allocatable :: dv(:), exp_sqrtA_v(:), bhat(:), buf(:) integer(int32) :: i, k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol logical, optional, intent(in) :: binomial real(real64), optional, intent(in) :: r !real(real64),optional,intent(in) :: fix_val(:) ! only 0-fix is available integer(int32), optional, intent(in)::fix_idx(:) complex(real64) :: coeffi type(CRS_) :: Amatrix type(Math_) :: math if (present(coeff)) then coeffi = coeff else coeffi = 1.0d0 end if if (present(fix_idx)) then !Amatrix = this !bhat = zeros(this%size() ) !call Amatrix%fix(idx=fix_idx,RHS=bhat,val=fix_val) !increA_v = x !expA_v = zeros(size(x) ) !expA_v = expA_v + increA_v ! 1-st order term !increA_v = Amatrix%matmul(t*increA_v) - bhat*dt !expA_v = expA_v + increA_v !do i=2,itr_tol ! if(i==1)then ! cycle ! endif ! increA_v = 1.0d0/(i)*Amatrix%matmul(t*increA_v) !- bhat*dt ! expA_v = expA_v + increA_v ! ! if(abs(dot_product(increA_v,increA_v)) < tol )exit !enddo Amatrix = this dv = zeros(size(v) - size(fix_idx)) k = 0 do i = 1, size(v) if (i.in.fix_idx) then cycle else k = k + 1 dv(k) = v(i) end if end do !dv(fix_idx(:) ) = fix_val(:)!/exp(1.0d0) !call Amatrix%fix(idx=fix_idx,RHS=bhat,val=fix_val+0.0d0*math%i) !call Amatrix%fix(idx=fix_idx,RHS=bhat,val=0.0d0*zeros(size(fix_idx) ) +0.0d0*math%i) call Amatrix%remove(idx=fix_idx) ! remove row and columns !call Amatrix%fix(idx=fix_idx,val=fix_val+0.0d0*math%i,only_row=.true.) !call Amatrix%fix(idx=fix_idx,RHS=dv,val=fix_val+0.0d0*math%i) !dv = dv + bhat !<test> !dv(fix_idx) = fix_val ! ignore row and columns exp_sqrtA_v = zeros(size(v) - size(fix_idx)) exp_sqrtA_v = exp_sqrtA_v + dv !exp_sqrtA_v(fix_idx(:) ) = fix_val(:) ! 1-st order term dv = coeffi*Amatrix%tensor_sqrt(v=dv, tol=tol, itrmax=itrmax, binomial=binomial, r=r) exp_sqrtA_v = exp_sqrtA_v + dv !exp_sqrtA_v(fix_idx) = fix_val do i = 2, itrmax dv = 1.0d0/(i)*coeffi*Amatrix%tensor_sqrt(v=dv, tol=tol, itrmax=itrmax, binomial=binomial, r=r) !dv(fix_idx) = 0.0d0 exp_sqrtA_v = exp_sqrtA_v + dv !exp_sqrtA_v(fix_idx) = fix_val if (abs(dot_product(dv, dv)) < tol) exit end do !exp_sqrtA_v(fix_idx(:) ) = fix_val(:) buf = exp_sqrtA_v exp_sqrtA_v = 0.0d0*v k = 0 do i = 1, size(v) if (i.in.fix_idx) then exp_sqrtA_v(i) = 0.0d0 else k = k + 1 exp_sqrtA_v(i) = buf(k) end if end do else dv = v exp_sqrtA_v = zeros(size(v)) exp_sqrtA_v = exp_sqrtA_v + dv ! 1-st order term dv = coeffi*this%tensor_sqrt(v=dv, tol=tol, itrmax=itrmax, binomial=binomial, r=r) exp_sqrtA_v = exp_sqrtA_v + dv do i = 2, itrmax dv = 1.0d0/(i)*coeffi*this%tensor_sqrt(v=dv, tol=tol, itrmax=itrmax, binomial=binomial, r=r) exp_sqrtA_v = exp_sqrtA_v + dv if (abs(dot_product(dv, dv)) < tol) exit end do end if end function ! ##################################################### ! ################################################### function tensor_sqrt_complex64_crs(this, v, tol, itrmax, binomial, r) result(sqrtA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), sqrtA_v(:), logA_v(:) integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol real(real64) :: coeff, nCr logical, optional, intenT(in) :: binomial real(real64), optional, intent(in) :: r if (present(binomial)) then if (binomial) then ! 二項係数によるsqrt(A)の近似 if (present(r)) then coeff = 1.0d0/maxval(this%val*r) else coeff = 1.0d0/maxval(this%val*5) end if sqrtA_v = 0.0d0*v dv = v do k = 0, itrmax nCr = gamma(0.50d0 + 1.0d0)/gamma(k + 1.0d0)/gamma(0.50d0 - k + 1) sqrtA_v = sqrtA_v + nCr*dv dv = (-dv + coeff*this%matmul(dv)) if (norm(dble(dv))/size(dv) < tol) exit end do sqrtA_v = sqrtA_v/sqrt(coeff) return end if end if dv = v sqrtA_v = zeros(size(v)) sqrtA_v = sqrtA_v + dv ! 1-st order term dv = 0.50d0*this%tensor_log(v=dv, tol=tol, itrmax=itrmax) sqrtA_v = sqrtA_v + dv do k = 2, itrmax if (k == 1) then cycle end if dv = 1.0d0/dble(k)*0.50d0*this%tensor_log(v=dv, tol=tol, itrmax=itrmax) sqrtA_v = sqrtA_v + dv if (abs(dot_product(dv, dv)) < tol) exit end do end function ! ##################################################### ! ################################################### function tensor_log_complex64_crs(this, v, tol, itrmax) result(sqrtA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), sqrtA_v(:) integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol !complex(real64) :: c real(real64), allocatable :: c(:) !c = maxval(this%val)*1. c = this%diag() ! テイラー展開中心をdiagで決める. !c = maxval(this%diag())*3.0d0*ones(size(v)) ! テイラー展開中心をdiagで決める. !c = maxval(this%diag(cell_centered=.true.))*ones(size(v)) ! テイラー展開中心をdiagで決める. !c = 1.0d0 ! k = 0 sqrtA_v = zeros(size(v)) sqrtA_v = log(c)*v do k = 1, itrmax if (k == 1) then dv = k/c*(-c*v + this%matmul(v)) else dv = (-1.0d0)/c*dble(k)/dble(k + 1)*(-c*dv + this%matmul(dv)) end if sqrtA_v = sqrtA_v + dv if (norm(abs(dv)) < tol) return end do end function ! ##################################################### ! ################################################### function tensor_sinc_sqrt_complex64_crs(this, v, tol, itrmax, coeff, debug, fix_idx) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), optional, intent(in) :: coeff complex(real64), allocatable :: dv(:), retA_v(:) integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: k integer(int32), intent(in) :: itrmax logical, optional, intent(in) :: debug real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: a, b if (present(coeff)) then a = coeff else a = 1.0d0 end if ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 !k=0 k = 0 retA_v = v ! zero-fixed Dirichlet boundary if (present(fix_idx)) then retA_v(fix_idx) = 0 end if ! k=1 k = 1 dv = a*a*this%matmul(v) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + ((-1.0d0)**k)*(sqrt(math%pi)*2.0d0**(-1 - 2*k)) & /gamma(1.0d0 + k)/gamma(1.5d0 + k)*dv do k = 2, itrmax dv = a*a*this%matmul(dv) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if b = ((-1.0d0)**k)*(sqrt(math%pi)*2.0d0**(-1 - 2*k)) & /gamma(1.0d0 + k)/gamma(1.5d0 + k) retA_v = retA_v + b*dv if (norm(abs(b*dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(b*dv)) end if end if end do end function ! ##################################################### ! ################################################### function tensor_sinc_sqrt_real64_crs(this, v, tol, itrmax, coeff, debug, fix_idx) result(retA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), optional, intent(in) :: coeff real(real64), allocatable :: dv(:), retA_v(:) integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: k integer(int32), intent(in) :: itrmax logical, optional, intent(in) :: debug real(real64), intent(in) :: tol type(Math_) :: math real(real64) :: a, b if (present(coeff)) then a = coeff else a = 1.0d0 end if ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 !k=0 k = 0 retA_v = v ! zero-fixed Dirichlet boundary if (present(fix_idx)) then retA_v(fix_idx) = 0 end if ! k=1 k = 1 dv = a*a*this%matmul(v) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + ((-1.0d0)**k)*(sqrt(math%pi)*2.0d0**(-1 - 2*k)) & /gamma(1.0d0 + k)/gamma(1.5d0 + k)*dv do k = 2, itrmax dv = a*a*this%matmul(dv) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if b = ((-1.0d0)**k)*(sqrt(math%pi)*2.0d0**(-1 - 2*k)) & /gamma(1.0d0 + k)/gamma(1.5d0 + k) retA_v = retA_v + b*dv if (norm(abs(b*dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(b*dv)) end if end if end do end function ! ##################################################### function tensor_wave_kernel_complex_64_crs(this, u0, v0, tol, itrmax, h, t, fix_idx, debug) result(u) class(CRS_), intent(in) :: this complex(real64), intent(in) :: u0(:), v0(:) complex(real64), allocatable:: Adu(:), Adv(:), u(:) complex(real64) :: cos_coeff, sinc_coeff real(real64), intent(in) :: h, t logical, optional, intent(in) :: debug integer(int32), optional, intent(in) :: itrmax real(real64), optional, intent(in) :: tol integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: itr_max = 100 real(real64) :: itr_tol = dble(1.0e-16) logical :: debug_mode type(Math_) :: math integer(int32) :: n if (present(itrmax)) then itr_max = itrmax end if if (present(tol)) then itr_tol = tol end if ! a + 2 h M^{-1} v + M^{-1} K u = 0 ! u(t) = exp(-ht)( cos(t*sqrt(M^{-1} K - h^2 I) ) u ! + t*sinc( t*sqrt(M^{-1} K - h^2 I) ) v u = exp(-h*t)*this%tensor_cos_sqrt(v=u0, tol=itr_tol, itrmax=itr_max, & coeff=t + 0*math%i, debug=debug, fix_idx=fix_idx) & + exp(-h*t)*t*this%tensor_sinc_sqrt(v=v0, tol=itr_tol, itrmax=itr_max, & coeff=t + 0*math%i, debug=debug, fix_idx=fix_idx) end function function tensor_wave_kernel_real_64_crs(this, u0, v0, tol, itrmax, h, t, fix_idx, debug) result(u) class(CRS_), intent(inout) :: this real(real64), intent(in) :: u0(:), v0(:) real(real64), intent(in) :: h, t logical, optional, intent(in) :: debug integer(int32), optional, intent(in) :: itrmax real(real64), optional, intent(in) :: tol integer(int32), optional, intent(in) :: fix_idx(:) real(real64), allocatable:: Adu(:), Adv(:), u(:) real(real64) :: cos_coeff, sinc_coeff integer(int32) :: itr_max = 100 real(real64) :: itr_tol = dble(1.0e-16) logical :: debug_mode type(Math_) :: math integer(int32) :: n if (present(itrmax)) then itr_max = itrmax end if if (present(tol)) then itr_tol = tol end if if (present(fix_idx)) then call this%fix(fix_idx) end if ! a + 2 h M^{-1} v + M^{-1} K u = 0 ! u(t) = exp(-ht)( cos(t*sqrt(M^{-1} K - h^2 I) ) u ! + t*sinc( t*sqrt(M^{-1} K - h^2 I) ) v u = exp(-h*t)*this%tensor_cos_sqrt(v=u0, tol=itr_tol, itrmax=itr_max, & coeff=t, debug=debug, fix_idx=fix_idx) & + exp(-h*t)*t*this%tensor_sinc_sqrt(v=v0, tol=itr_tol, itrmax=itr_max, & coeff=t, debug=debug, fix_idx=fix_idx) end function ! ################################################### function tensor_wave_kernel_LPF_real_64_crs(this, u0, v0, tol, itrmax, h, t, fix_idx, debug, cutoff_frequency) result(u) class(CRS_), intent(in) :: this real(real64), intent(in) :: u0(:), v0(:) real(real64), allocatable:: Adu(:), Adv(:), u(:) real(real64) :: cos_coeff, sinc_coeff real(real64), intent(in) :: h, t, cutoff_frequency logical, optional, intent(in) :: debug integer(int32), optional, intent(in) :: itrmax real(real64), optional, intent(in) :: tol integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: itr_max = 100 real(real64) :: itr_tol = dble(1.0e-16) logical :: debug_mode type(Math_) :: math integer(int32) :: n if (present(itrmax)) then itr_max = itrmax end if if (present(tol)) then itr_tol = tol end if ! a + 2 h M^{-1} v + M^{-1} K u = 0 ! u(t) = exp(-ht)( cos(t*sqrt(M^{-1} K - h^2 I) ) u ! + t*sinc( t*sqrt(M^{-1} K - h^2 I) ) v u = exp(-h*t)*this%tensor_cos_sqrt_LPF(v=u0, tol=itr_tol, itrmax=itr_max, & coeff=t, debug=debug, fix_idx=fix_idx, cutoff_frequency=cutoff_frequency) & + exp(-h*t)*this%tensor_t_sinc_sqrt_LPF(v=v0, tol=itr_tol, itrmax=itr_max, & coeff=t, debug=debug, fix_idx=fix_idx, cutoff_frequency=cutoff_frequency) end function ! ################################################### function tensor_wave_kernel_RHS_real_64_crs(this, RHS, t, tol, itrmax, fix_idx, debug) result(u) class(CRS_), intent(in) :: this real(real64), intent(in) :: RHS(:) real(real64), allocatable:: u(:), du(:) real(real64) :: cos_coeff, sinc_coeff real(real64), intent(in) :: t logical, optional, intent(in) :: debug integer(int32), optional, intent(in) :: itrmax real(real64), optional, intent(in) :: tol integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: itr_max = 100 real(real64) :: itr_tol = dble(1.0e-16) logical :: debug_mode type(Math_) :: math integer(int32) :: n if (present(itrmax)) then itr_max = itrmax end if if (present(tol)) then itr_tol = tol end if ! Force-induced displacement ! RHS is constant for interval [0, t] !u = -1.0d0/2.0d0*t*t*RHS u = 0.0d0*RHS du = -t*t/2.0d0*RHS u = u + du do n = 1, itr_max du = -t*t/dble(2*n + 1)/dble(2*n + 2)*this%matmul(du) u = u + du if (present(debug)) then if (debug) then print *, n, norm(du) end if end if if (norm(du) < itr_tol) exit end do end function ! ################################################### function tensor_wave_kernel_RHS_LPF_real_64_crs(this, RHS, t, tol, itrmax, fix_idx, debug, cutoff_frequency, weights) result(u) class(CRS_), intent(in) :: this real(real64), intent(in) :: RHS(:), cutoff_frequency real(real64), allocatable:: u(:), du(:) real(real64) :: cos_coeff, sinc_coeff real(real64), intent(in) :: t logical, optional, intent(in) :: debug integer(int32), optional, intent(in) :: itrmax real(real64), optional, intent(in) :: tol integer(int32), optional, intent(in) :: fix_idx(:) real(real64), optional, intent(in) :: weights(:) integer(int32) :: itr_max = 100 real(real64) :: itr_tol = dble(1.0e-16) real(real64) :: ddt, hat_t logical :: debug_mode type(Math_) :: math integer(int32) :: n, m real(real64), allocatable :: w(:) integer(int32) :: lbd, ubd if (present(weights)) then ubd = size(weights) - (size(weights) + 1)/2 lbd = -(size(weights) - (size(weights) + 1)/2) allocate (w(lbd:ubd)) w(lbd:ubd) = weights(1:) else allocate (w(-1:1)) w(-1) = 0.250d0 w(0) = 0.500d0 w(1) = 0.250d0 end if if (present(itrmax)) then itr_max = itrmax end if if (present(tol)) then itr_tol = tol end if ! Force-induced displacement ! RHS is constant for interval [0, t] !ddt = 1.0d0/4.0d0/cutoff_frequency ddt = 1.0d0/cutoff_frequency/2.0d0/math%pi*acos(sqrt(2.0d0) - 1.0d0) !u = -1.0d0/2.0d0*t*t*RHS u = 0.0d0*RHS hat_t = 0.0d0 !print *,"dbg",lbound(w, 1), ubound(w, 1) do m = lbound(w, 1), ubound(w, 1) hat_t = hat_t + w(m)*(t + m*ddt)**2 end do !hat_t = 0.250d0*(t-ddt)**2 & ! + 0.50d0*(t)**2 & ! + 0.250d0*(t+ddt)**2 du = -1.0d0/2.0d0*RHS u = u + hat_t*du do n = 1, itr_max hat_t = 0.0d0 do m = lbound(w, 1), ubound(w, 1) hat_t = hat_t + w(m)*(t + m*ddt)**(2*n + 2) end do !hat_t = 0.250d0*(t-ddt)**(2*n+2) & ! + 0.50d0*(t)**(2*n+2)& ! + 0.250d0*(t+ddt)**(2*n+2) du = -1.0d0/dble(2*n + 1)/dble(2*n + 2)*this%matmul(du) u = u + hat_t*du if (present(debug)) then if (debug) then print *, n, norm(hat_t*du) end if end if if (norm(hat_t*du) < itr_tol) exit end do end function ! ################################################### function tensor_wave_kernel_RHS_complex_64_crs(this, RHS, t, tol, itrmax, fix_idx, debug) result(u) class(CRS_), intent(in) :: this complex(real64), intent(in) :: RHS(:) complex(real64), allocatable:: u(:), u_n(:), du(:) real(real64), intent(in) :: t logical, optional, intent(in) :: debug integer(int32), optional, intent(in) :: itrmax real(real64), optional, intent(in) :: tol integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: itr_max = 100 real(real64) :: itr_tol = dble(1.0e-16) logical :: debug_mode type(Math_) :: math integer(int32) :: n if (present(itrmax)) then itr_max = itrmax end if if (present(tol)) then itr_tol = tol end if ! Force-induced displacement ! RHS is constant for interval [0, t] u = 0.0d0*RHS du = -t*t/2.0d0*RHS u = u + du do n = 1, itr_max du = -t*t/dble(2*n + 1)/dble(2*n + 2)*this%matmul(du) u = u + du if (present(debug)) then if (debug) then print *, n, sqrt(dble(dot_product(du, du))) end if end if if (sqrt(dble(dot_product(du, du))) < itr_tol) exit end do end function ! ################################################### ! ################################################### function tensor_d1_wave_kernel_complex64_crs(this, u, v, tol, itrmax, coeff) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: u(:), v(:) complex(real64), optional, intent(in) :: coeff complex(real64), allocatable :: dv(:), retA_v(:), du(:) integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: a ! first derivative of wave kernel function ! v - t u w^2 - 1/2 t^2 (v w^2) + 1/6 t^3 u w^4 + 1/24 t^4 v w^4 - 1/120 t^5 (u w^6) + O(t^6) !(テイラー級数) ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 if (present(coeff)) then a = coeff else a = 1.0d0 end if !1,2 du = this%matmul(u) retA_v = v - a*du !3 dv = this%matmul(v) retA_v = retA_v - 1.0d0/2.0d0*a*a*dv !4 du = this%matmul(du) retA_v = retA_v + 1.0d0/6.0d0*a*a*a*du !5 dv = this%matmul(dv) retA_v = retA_v + 1.0d0/24.0d0*a*a*a*a*dv !6 du = this%matmul(du) retA_v = retA_v - 1.0d0/120.0d0*a*a*a*a*a*du ! 6th-order apploximation end function ! ##################################################### ! ################################################### function tensor_cos_sqrt_complex64_crs(this, v, tol, itrmax, coeff, debug, fix_idx) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), retA_v(:) complex(real64), optional, intent(in) :: coeff integer(int32), optional, intent(in) :: fix_idx(:) logical, optional, intent(in) :: debug integer(int32) :: k, row, CRS_id integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: a if (present(coeff)) then a = coeff else a = 1.0d0 end if ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 !k=0 k = 0 retA_v = v ! zero-fixed Dirichlet boundary if (present(fix_idx)) then retA_v(fix_idx) = 0 end if ! a=t ! k=1 k = 1 dv = this%matmul(v) dv = -0.50d0*a*a*dv ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + dv ! do k = 2, itrmax ! k = k + 1 dv = (a*a)*this%matmul(dv)/(2*k)/(2*k - 1)*(-1.0d0) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + dv if (norm(abs(dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(dv)) end if end if end do end function ! ##################################################### ! ##################################################### function wave_kernel_hanning_coefficient(dt, num_sample, t_power) result(ret) real(real64), intent(in)::dt integer, intent(in) :: num_sample, t_power integer(int32) :: m, k real(real64) :: ret, t, period_T type(Math_) :: math k = num_sample/2 ret = 0.0d0 period_T = 1.0d0/dt*2 do m = -k, k t = -num_sample*dt + (m - 1)*dt ret = ret + (0.50d0 - 0.50d0*cos(2.0d0*math%pi*(t)/period_T)) & *(m*dt)**(t_power) end do end function ! ##################################################### ! ################################################### function tensor_cos_sqrt_real64_crs(this, v, tol, itrmax, coeff, debug, fix_idx) result(retA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), allocatable :: dv(:), retA_v(:) real(real64), optional, intent(in) :: coeff integer(int32), optional, intent(in) :: fix_idx(:) logical, optional, intent(in) :: debug integer(int32) :: k, row, CRS_id integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math real(real64) :: a if (present(coeff)) then a = coeff else a = 1.0d0 end if ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 !k=0 k = 0 retA_v = v ! zero-fixed Dirichlet boundary if (present(fix_idx)) then retA_v(fix_idx) = 0 end if if (present(debug)) then if (debug) then print *, k, norm(abs(v)) end if end if ! k=1 k = 1 dv = this%matmul(v) dv = -0.50d0*a*a*dv ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + dv if (present(debug)) then if (debug) then print *, k, norm(abs(dv)) end if end if do k = 2, itrmax ! k = k + 1 dv = a*a*this%matmul(dv)/(2*k)/(2*k - 1)*(-1.0d0) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + dv if (norm(abs(dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(dv)) end if end if end do end function ! ##################################################### ! ################################################### function tensor_cos_sqrt_LPF_real64_crs(this, v, tol, itrmax, coeff, debug, fix_idx, cutoff_frequency) result(retA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), allocatable :: dv(:), retA_v(:) real(real64), optional, intent(in) :: coeff integer(int32), optional, intent(in) :: fix_idx(:) logical, optional, intent(in) :: debug integer(int32) :: k, row, CRS_id, num_hanning_sample integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol, cutoff_frequency type(Math_) :: math real(real64) :: a, dt, b, ddt !ddt = 1.0d0/(cutoff_frequency/4.0d0) !ddt = 1.0d0/cutoff_frequency/math%pi*acos( (sqrt(2.0d0)-1.0d0)/2.0d0 ) ddt = 1.0d0/cutoff_frequency/2.0d0/math%pi*acos(sqrt(2.0d0) - 1.0d0) if (present(coeff)) then a = coeff else a = 1.0d0 end if ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 dt = a dv = v !k=0 k = 0 retA_v = v ! zero-fixed Dirichlet boundary if (present(fix_idx)) then retA_v(fix_idx) = 0 end if do k = 1, itrmax ! k = k + 1 !dv = a*a*this%matmul(dv)/(2*k)/(2*k-1)*(-1.0d0) dv = this%matmul(dv) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if a = ((-1.0d0)**k)/gamma(2.0d0*k + 1.0d0) b = 0.250d0*((dt - ddt)**(2*k)) + 0.50d0*((dt)**(2*k)) + 0.250d0*((dt + ddt)**(2*k)) retA_v = retA_v + a*b*dv if (norm(abs(a*b*dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(a*b*dv)) end if end if end do end function ! ##################################################### ! ################################################### function tensor_t_sinc_sqrt_LPF_real64_crs(this, v, tol, itrmax, coeff, debug, fix_idx, cutoff_frequency) & result(retA_v) class(CRS_), intent(in) :: this real(real64), intent(in) :: v(:) real(real64), optional, intent(in) :: coeff real(real64), allocatable :: dv(:), retA_v(:) integer(int32), optional, intent(in) :: fix_idx(:) integer(int32) :: k, num_hanning_sample integer(int32), intent(in) :: itrmax logical, optional, intent(in) :: debug real(real64), intent(in) :: tol, cutoff_frequency type(Math_) :: math real(real64) :: a, b, dt, ddt !num_hanning_sample = input(default=16,option=window_size) !ddt = 1.0d0/(cutoff_frequency/4.0d0) !ddt = 1.0d0/cutoff_frequency/math%pi*acos( (sqrt(2.0d0)-1.0d0)/2.0d0 ) ddt = 1.0d0/cutoff_frequency/2.0d0/math%pi*acos(sqrt(2.0d0) - 1.0d0) if (present(coeff)) then a = coeff else a = 1.0d0 end if ! sinc(sqrt(A) ) = \sum_{n=0}^{\infty} !c = 1.0d0 ! k = 0 dt = a !k=0 k = 0 retA_v = v dv = v if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + dt*dv do k = 1, itrmax a = ((-1.0d0)**k)*(sqrt(math%pi)*2.0d0**(-1 - 2*k)) & /gamma(1.0d0 + k)/gamma(1.5d0 + k) b = 0.250d0*((dt - ddt)**(2*k + 1)) + 0.50d0*((dt)**(2*k + 1)) & + 0.250d0*((dt + ddt)**(2*k + 1)) dv = this%matmul(dv) ! zero-fixed Dirichlet boundary if (present(fix_idx)) then dv(fix_idx) = 0 end if retA_v = retA_v + a*b*dv if (norm(abs(a*b*dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(a*b*dv)) end if end if end do end function ! ##################################################### ! ##################################################### subroutine fix_complex64_CRS(this, idx, val, RHS, only_row) class(CRS_), intent(inout) :: this integer(int32), intent(in) :: idx(:) complex(real64), optional, intent(inout) :: RHS(:) complex(real64), intent(in) :: val(:) integer(int64) :: i, j, k, id logical, optional, intent(in) :: only_row if (present(only_row)) then if (only_row) then do i = 1, size(idx) id = idx(i) do j = this%row_ptr(id), this%row_ptr(id + 1) - 1 if (this%col_idx(j) == id) then this%val(j) = 1.0d0 else this%val(j) = 0.0d0 end if end do end do return end if end if do i = 1, size(idx) id = idx(i) do j = 1, size(this%row_ptr) - 1 if (j == id) then do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 0.0d0 else this%val(k) = 0.0d0 end if end do else do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then RHS(j) = RHS(j) - this%val(k)*val(i) this%val(k) = 0.0d0 end if end do end if end do end do do i = 1, size(idx) id = idx(i) do j = 1, size(this%row_ptr) - 1 if (j == id) then do k = this%row_ptr(j), this%row_ptr(j + 1) - 1 if (this%col_idx(k) == id) then this%val(k) = 1.0d0 end if end do end if end do end do do i = 1, size(idx) RHS(id) = val(i) end do end subroutine ! ##################################################### subroutine removeCRS(this, idx) class(CRS_), intent(inout) :: this integer(int32), optional, intent(in) :: idx(:) integer(int64), allocatable :: num_col(:), col_idx(:), only_k(:), & copy_idx(:), row_ptr(:) real(real64), allocatable :: val(:) integer(int64) :: row, col_id, count_id, n, k, i, j if (.not. present(idx)) then if (allocated(this%col_idx)) then deallocate (this%col_idx) end if if (allocated(this%row_ptr)) then deallocate (this%row_ptr) end if if (allocated(this%val)) then deallocate (this%val) end if return end if copy_idx = idx ! remove row and column listed in idx(:) do i = 1, size(idx) k = maxval(copy_idx) num_col = int(zeros(this%size())) ! zerofill col_idx if row==k or col==k do row = 1, size(this%row_ptr) - 1 num_col(row) = this%row_ptr(row + 1) - this%row_ptr(row) do col_id = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (row == k .or. this%col_idx(col_id) == k) then this%col_idx(col_id) = 0 num_col(row) = num_col(row) - 1 end if end do end do !call print(num_col) ! create new row_ptr row_ptr = int(zeros(this%size() - 1 + 1)) row_ptr(1) = 1 n = 0 do row = 1, size(this%row_ptr) - 1 if (row == k) then cycle else n = n + 1 row_ptr(n + 1) = row_ptr(n) + num_col(row) end if end do ! remove zero col_idx = int(zeros(sum(num_col))) val = zeros(sum(num_col)) n = 0 do row = 1, k - 1 do col_id = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (this%col_idx(col_id) /= 0) then n = n + 1 col_idx(n) = this%col_idx(col_id) val(n) = this%val(col_id) end if end do end do do row = k + 1, size(this%row_ptr) - 1 do col_id = this%row_ptr(row), this%row_ptr(row + 1) - 1 if (this%col_idx(col_id) /= 0) then n = n + 1 col_idx(n) = this%col_idx(col_id) val(n) = this%val(col_id) end if end do end do this%col_idx = col_idx do j = 1, size(this%col_idx) if (this%col_idx(j) >= k) then this%col_idx(j) = this%col_idx(j) - 1 end if end do this%val = val this%row_ptr = row_ptr if (size(copy_idx) == 1) then exit else copy_idx = removeif(copy_idx, equal_to=k) cycle end if end do end subroutine function sum_sum(a, b, a_params, b_params, i_plus_j, ir, jr) result(ret) interface ! coefficient #1 function a(k, params) result(ret) use iso_fortran_env integer(int32), intent(in) :: k complex(real64), intent(in) :: params(:) complex(real64) :: ret end function end interface interface ! coefficient #2 function b(k, params) result(ret) use iso_fortran_env integer(int32), intent(in) :: k complex(real64), intent(in) :: params(:) complex(real64) :: ret end function end interface integer(int32), intent(in) :: i_plus_j ! i+j complex(real64), intent(in) :: a_params(:), b_params(:) integer(int32), intent(in) :: ir(1:2) ! range of i integer(int32), intent(in) :: jr(1:2) ! range of j integer(int32) :: i, j complex(real64) :: ret ret = 0.0d0 do i = ir(1), i_plus_j j = i_plus_j - i if (j < jr(1) .or. jr(2) < j) cycle ret = ret + a(i, a_params)*b(j, b_params) end do end function ! ################################################### function tensor_cos_sqrt_cos_sqrt_complex64_crs(this, v, tol, itrmax, coeff_1, coeff_2) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), retA_v(:) complex(real64), intent(in) :: coeff_1, coeff_2 integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: coeff do k = 0, itrmax ! k = k + 1 coeff = sum_sum( & a=cos_sqrt_Taylor, & b=cos_sqrt_Taylor, & a_params=[coeff_1], & b_params=[coeff_2], & i_plus_j=k, & ir=[0, itrmax], & jr=[0, itrmax] & ) if (k == 0) then dv = v retA_v = coeff*dv else dv = this%matmul(dv) retA_v = retA_v + coeff*dv end if if (norm(abs(dv)) < tol) return end do end function ! ##################################################### ! ################################################### function tensor_sinc_sqrt_cos_sqrt_complex64_crs(this, v, tol, itrmax, coeff_1, coeff_2, debug) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), retA_v(:) complex(real64), intent(in) :: coeff_1, coeff_2 logical, optional, intent(in) :: debug integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: coeff do k = 0, itrmax ! k = k + 1 coeff = sum_sum( & a=sinc_sqrt_Taylor, & b=cos_sqrt_Taylor, & a_params=[coeff_1], & b_params=[coeff_2], & i_plus_j=k, & ir=[0, itrmax], & jr=[0, itrmax] & ) if (k == 0) then dv = v retA_v = coeff*dv else dv = this%matmul(dv) retA_v = retA_v + coeff*dv end if if (norm(abs(dv)) < tol) return if (present(debug)) then if (debug) then print *, k, norm(abs(dv)) end if end if end do end function ! ##################################################### ! ################################################### function tensor_cos_sqrt_sinc_sqrt_complex64_crs(this, v, tol, itrmax, coeff_1, coeff_2) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), retA_v(:) complex(real64), intent(in) :: coeff_1, coeff_2 integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: coeff do k = 0, itrmax ! k = k + 1 coeff = sum_sum( & a=cos_sqrt_Taylor, & b=sinc_sqrt_Taylor, & a_params=[coeff_1], & b_params=[coeff_2], & i_plus_j=k, & ir=[0, itrmax], & jr=[0, itrmax] & ) if (k == 0) then dv = v retA_v = coeff*dv else dv = this%matmul(dv) retA_v = retA_v + coeff*dv end if if (norm(abs(dv)) < tol) return end do end function ! ##################################################### ! ################################################### function tensor_sinc_sqrt_sinc_sqrt_complex64_crs(this, v, tol, itrmax, coeff_1, coeff_2) result(retA_v) class(CRS_), intent(in) :: this complex(real64), intent(in) :: v(:) complex(real64), allocatable :: dv(:), retA_v(:) complex(real64), intent(in) :: coeff_1, coeff_2 integer(int32) :: k integer(int32), intent(in) :: itrmax real(real64), intent(in) :: tol type(Math_) :: math complex(real64) :: coeff do k = 0, itrmax ! k = k + 1 coeff = sum_sum( & a=sinc_sqrt_Taylor, & b=sinc_sqrt_Taylor, & a_params=[coeff_1], & b_params=[coeff_2], & i_plus_j=k, & ir=[0, itrmax], & jr=[0, itrmax] & ) if (k == 0) then dv = v retA_v = coeff*dv else dv = this%matmul(dv) retA_v = retA_v + coeff*dv end if if (norm(abs(dv)) < tol) return end do end function ! ##################################################### function cos_sqrt_Taylor(k, params) result(ret) integer(int32), intent(in) :: k complex(real64), intent(in) :: params(:) complex(real64) :: ret ! Taylor expanision coefficient for cos(a*sqrt(x) ) around x=0 ret = ((-1.0d0)**k)*(params(1)**(2*k))/gamma(2.0d0*k + 1.0d0) end function ! ##################################################### function sinc_sqrt_Taylor(k, params) result(ret) integer(int32), intent(in) :: k complex(real64), intent(in) :: params(:) complex(real64) :: ret type(Math_) :: math ! a = params(1) ! Taylor expanision coefficient for sinc(a*sqrt(x) ) around x=0 ret = dble(2.0d0**(-1 - 2*k))*dble((-1.0d0)**k)*(params(1)**(2*k))*sqrt(math%pi) & /Gamma(1.0d0 + k*1.0d0)/Gamma(1.50d0 + k*1.0d0) !ret = dble(2.0d0**(-1-2*k))*dble((-1.0d0)**k)!*(params(1)**(2*k) ) end function ! ##################################################### function sinc_complex64(x) result(ret) complex(real64), intent(in) :: x complex(real64) :: ret if (abs(x) == 0.0d0) then ret = 1.0d0 else ret = sin(x)/x end if end function ! ##################################################### function sinc_real64(x) result(ret) real(real64), intent(in) :: x real(real64) :: ret if (abs(x) == 0.0d0) then ret = 1.0d0 else ret = sin(x)/x end if end function ! ##################################################### function sinc_real64_vec(x) result(ret) real(real64), intent(in) :: x(:) real(real64), allocatable :: ret(:) integer(int32) :: i allocate (ret(size(x))) do i = 1, size(x) ret(i) = sinc(x(i)) end do end function function to_diag_vector_to_CRS(diag_vec) result(ret) real(real64), intent(in) :: diag_vec(:) type(CRS_) :: ret call ret%eyes(size(diag_vec)) ret%val = diag_vec end function ! ################################################### subroutine to_real32_CRS(this) class(CRS_), intent(inout) :: this this%val_real32 = real(this%val) deallocate (this%val) this%dtype = real32 end subroutine ! ################################################### !! ################################################### !function divide_by_diagonal_vector_CRS(this,diag_vector) result(ret) ! type(CRS_),intent(in) :: this ! type(CRS_) :: ret ! real(real64),intent(in) :: diag_vector(:) ! integer(int32) :: i ! ! ret = this ! do i=1,ret%size() ! ret%val(ret%row_ptr(i):ret%row_ptr(i+1)-1) = & ! ret%val(ret%row_ptr(i):ret%row_ptr(i+1)-1)/diag_vector(i) ! enddo !end function !! ################################################### ! !function divide_by_scalar_CRS(this,scalar_value) result(ret) ! type(CRS_),intent(in) :: this ! real(real64),intent(in) :: scalar_value ! type(CRS_) :: ret ! ! ret = this ! ret%val = ret%val/scalar_value !end function !! ################################################### ! ################################################### function to_COO_from_DenseMatrix(dense_matrix) result(ret) type(COO_) :: ret real(real64), intent(in) :: dense_matrix(:, :) integer(int32) :: DOF, i, j DOF = size(dense_matrix, 1) call ret%init(DOF) do i = 1, size(dense_matrix, 1) do j = 1, size(dense_matrix, 2) call ret%add(i, j, dense_matrix(i, j)) end do end do end function ! ################################################### ! ################################################### function to_COO_from_ArrayObject(arrayobject) result(ret) type(COO_) :: ret type(Array_), intent(in) :: arrayobject real(real64), allocatable :: dense_matrix(:, :) integer(int32) :: DOF, i, j dense_matrix = arrayobject DOF = size(dense_matrix, 1) call ret%init(DOF) do i = 1, size(dense_matrix, 1) do j = 1, size(dense_matrix, 1) if (dense_matrix(i, j) == 0.0d0) cycle call ret%add(i, j, dense_matrix(i, j)) end do end do end function ! ################################################### ! ################################################### function to_CRS_from_DenseMatrix(dense_matrix) result(ret) type(COO_) :: coo type(CRS_) :: ret real(real64), intent(in) :: dense_matrix(:, :) coo = to_COO(dense_matrix) ret = coo%to_CRS() end function ! ################################################### ! ################################################### function to_CRS_from_ArrayObject(arrayobject) result(ret) type(COO_) :: coo type(CRS_) :: ret type(Array_), intent(in) :: arrayobject real(real64), allocatable :: dense_matrix(:, :) dense_matrix = arrayobject coo = to_COO(dense_matrix) ret = coo%to_CRS() end function ! ################################################### ! ################################################### function imag_ICOO(n) result(ret) integer(int32), intent(in) :: n type(COO_) :: ret integer(int32) :: i type(Math_) :: math call ret%init(n) if (n == 1) then call ret%add(1, 1, math%i) return end if if (mod(n, 2) == 0) then ! even do i = 1, n/2 call ret%add(i, n - i + 1, -1.0d0) end do do i = n/2 + 1, n call ret%add(i, n - i + 1, 1.0d0) end do else ! odd ! even do i = 1, (n - 1)/2 call ret%add(i, n, -1.0d0) end do call ret%add((n - 1)/2 + 1, n, math%i) do i = (n - 1)/2 + 2, n call ret%add(i, n, 1.0d0) end do end if end function ! ################################################### ! ################################################### function speyeCOO(n) result(ret) integeR(int32), intent(in) :: n type(COO_) :: ret call ret%eyes(n) end function ! ################################################### ! ################################################### subroutine fillSymmetric_CRS(this) class(CRS_),intent(in) :: this ! fill-in to symmetric parts ! under implementation! end subroutine ! ################################################### ! ################################################### function allocated_CRS(this) result(ret) class(CRS_),intent(in) :: this logical :: ret ret = allocated(this%val) ret = ret .and. allocated(this%row_ptr) ret = ret .and. allocated(this%col_idx) end function ! ################################################### ! ################################################### ! >> BCRS procedures ! ################################################### ! ################################################### ! subroutine assemble_CRS(this,crs_matrices,matrices_shape) ! class(CRS_),intent(inout) :: this ! type(CRS_),intent(in) :: crs_matrices(:) ! integer(int32),intent(in) :: matrices_shape(1:2) ! ! ! end subroutine ! ################################################### subroutine setBCRS(this,block_position,CRS) class(BCRS_),intent(inout) :: this integer(int32) :: block_position(1:2) type(CRS_),intent(in) :: CRS type(BCRS_) :: BCRS_buffer if(.not.allocated(this%CRS))then allocate(this%CRS(block_position(1),block_position(2))) endif if(& block_position(1)>size(this%CRS,1) & .or. & block_position(2)>size(this%CRS,2) )then BCRS_buffer = this deallocate(this%CRS) allocate(this%CRS( max(block_position(1),size(BCRS_buffer%CRS,1)),& max(block_position(2),size(BCRS_buffer%CRS,2)))) this%CRS(1:size(BCRS_buffer%CRS,1),1:size(BCRS_buffer%CRS,2)) = & BCRS_buffer%CRS(1:size(BCRS_buffer%CRS,1),1:size(BCRS_buffer%CRS,2)) endif this%CRS(block_position(1),block_position(2)) = CRS end subroutine ! ################################################### ! ################################################### subroutine addBCRS(this,block_position,CRS) class(BCRS_),intent(inout) :: this integer(int32) :: block_position(1:2) type(CRS_),intent(in) :: CRS type(BCRS_) :: BCRS_buffer if(.not.allocated(this%CRS))then allocate(this%CRS(block_position(1),block_position(2))) endif if(& block_position(1)>size(this%CRS,1) & .or. & block_position(2)>size(this%CRS,2) )then BCRS_buffer = this deallocate(this%CRS) allocate(this%CRS( max(block_position(1),size(BCRS_buffer%CRS,1)),& max(block_position(2),size(BCRS_buffer%CRS,2)))) this%CRS(1:size(BCRS_buffer%CRS,1),1:size(BCRS_buffer%CRS,2)) = & BCRS_buffer%CRS(1:size(BCRS_buffer%CRS,1),1:size(BCRS_buffer%CRS,2)) endif this%CRS(block_position(1),block_position(2)) = & this%CRS(block_position(1),block_position(2)) + CRS end subroutine ! ################################################### ! ################################################### subroutine showShapeBCRS(this) class(BCRS_),intent(in) :: this integer(int32) :: i,j character(:),allocatable :: msg integer(int32) :: this_shape(1:2) print *, "BCRS :: shape >> " do i=1,size(this%CRS,1) do j = 1, size(this%CRS,2) if(allocated(this%CRS(i,j)))then this_shape = this%CRS(i,j)%shape() msg = "[" + str(this_shape(1))+","+str(this_shape(2)) + "]" else msg = "[-0-]" endif write (*,fmt='(a)', advance='no') msg enddo write (*,fmt='(a)', advance='yes') "" enddo print *, "BCRS :: shape << " end subroutine ! ################################################### ! ################################################### function matmulBCRS(this,vec) result(ret) class(BCRS_),intent(in) :: this real(real64),intent(in) :: vec(:) real(real64),allocatable :: ret(:) integer(int32),allocatable :: non_zero_idx(:,:) integer(int32) :: i,j,row_r(1:2),col_r(1:2),bcrs_shape(1:2),non_zero_counter,row,col bcrs_shape = this%shape() ret = zeros(bcrs_shape(1)) ! Pick-up non-zero values non_zero_counter = 0 do i=1,size(this%CRS,1) do j=1,size(this%CRS,2) if(.not.allocated(this%CRS(i,j))) then cycle else non_zero_counter = non_zero_counter + 1 endif enddo enddo allocate(non_zero_idx(non_zero_counter,6)) non_zero_counter = 0 do i=1,size(this%CRS,1) do j=1,size(this%CRS,2) if(.not.allocated(this%CRS(i,j))) then cycle else non_zero_counter = non_zero_counter + 1 non_zero_idx(non_zero_counter,1) = i ! row of block non_zero_idx(non_zero_counter,2) = j ! column of block non_zero_idx(non_zero_counter,3:4) = this%row_range(i,j) ! global row idx non_zero_idx(non_zero_counter,5:6) = this%col_range(i,j) ! global column idx endif enddo enddo !$OMP parallel do private(row,col,row_r,col_r) reduction(+:ret) do i=1,non_zero_counter row = non_zero_idx(i,1) col = non_zero_idx(i,2) row_r = non_zero_idx(i,3:4) col_r = non_zero_idx(i,5:6) ret(row_r(1):row_r(2)) = ret(row_r(1):row_r(2))& + this%CRS(row,col)%matmul(vec(col_r(1):col_r(2))) enddo !$OMP end parallel do end function ! ################################################### ! ################################################### function row_range_BCRS(this,box_row,box_col) result(ret) class(BCRS_),intent(in) :: this integer(int32),intent(in) :: box_row,box_col integer(int32) :: ret(1:2),this_shape(1:2) integer(int32),allocatable :: row_len(:) integer(int32) :: i,j ret(1:2) = [1,0] do i=1,box_row-1 ! for each box_row row_len = int(zeros(size(this%CRS,1))) do j=1,size(this%CRS,2) row_len(j) = 1 .of. this%CRS(i,j)%shape() enddo ret(1) = ret(1) + maxval(row_len) enddo row_len = int(zeros(size(this%CRS,1))) do j=1,size(this%CRS,2) row_len(j) = 1 .of. this%CRS(box_row,j)%shape() enddo ret(2) = ret(1) + maxval(row_len) -1 end function ! ################################################### ! ################################################### function col_range_BCRS(this,box_row,box_col) result(ret) class(BCRS_),intent(in) :: this integer(int32),intent(in) :: box_row,box_col integer(int32) :: ret(1:2),this_shape(1:2) integer(int32),allocatable :: col_len(:) integer(int32) :: i,j ret(1:2) = [1,0] do i=1,box_col-1 ! for each box_col col_len = int(zeros(size(this%CRS,2))) do j=1,size(this%CRS,2) col_len(j) = 2 .of. this%CRS(j,i)%shape() enddo ret(1) = ret(1) + maxval(col_len) enddo col_len = int(zeros(size(this%CRS,2))) do j=1,size(this%CRS,2) col_len(j) = 2 .of. this%CRS(j,box_col)%shape() enddo ret(2) = ret(1) + maxval(col_len) -1 !this_shape = this%CRS(box_row,box_col)%shape() !if (this_shape(2)==0)then ! ret(2) = ret(1) !else ! ret(2) = ret(1) + this_shape(2) -1 !endif end function ! ################################################### function shapeBCRS(this) result(ret) class(BCRS_),intent(in) :: this integer(int32) :: ret(1:2) ret(1) = maxval(this%row_range(size(this%CRS,1),size(this%CRS,2))) ret(2) = maxval(this%col_range(size(this%CRS,1),size(this%CRS,2))) end function ! ################################################### ! ################################################### function to_dense_BCRS(this) result(ret) class(BCRS_),intent(in) :: this real(real64),allocatable :: ret(:,:) integer(int32) :: n_row,n_col,i,j,bcrs_row,bcrs_col,row_1,row_2,col_1,col_2 ret = zeros(1 .of. this%shape(), 2 .of. this%shape()) do bcrs_row=1,size(this%crs,1) do bcrs_col=1,size(this%crs,2) if(.not.allocated(this%crs(bcrs_row,bcrs_col))) cycle row_1 = 1 .of. this%row_range(bcrs_row,bcrs_col) row_2 = 1 .of. this%row_range(bcrs_row,bcrs_col) + (1 .of. this%crs(bcrs_row,bcrs_col)%shape()) -1 col_1 = 1 .of. this%col_range(bcrs_row,bcrs_col) col_2 = 1 .of. this%col_range(bcrs_row,bcrs_col) + (2 .of. this%crs(bcrs_row,bcrs_col)%shape()) -1 ret( & row_1: row_2 ,col_1: col_2 ) = this%crs(bcrs_row,bcrs_col)%to_dense() enddo enddo end function ! ################################################### ! ################################################### function expBCRS(this,vec,max_itr,fix_idx) result(b) ! tensor exponential ! exp(x) = \sum_{0}^{\infty} \cfrac{1}{n!}x^{n} ! let a_{k} = x^{k}, b_{k} = \sum_{0}^{k} \cfrac{1}{n!}x^{n} ! b_{0} = 1 ! a_{0} = 1 ! a_{k+1} = \cfrac{1}{k} x*a_{k} ! b_{k+1} = b_{k} + a_{k+1} class(BCRS_),intent(in) :: this real(real64),intent(in) :: vec(:) integer(int32),optional,intent(in) :: max_itr,fix_idx(:) integer(int32) :: k,n,itr_max,this_shape(1:2) real(real64) :: tol real(real64),allocatable :: a(:),b(:) itr_max = input(default=100,option=max_itr) a = vec(:) b = vec(:) do k=1,itr_max a = 1.0d0/dble(k)*this%matmul(a) if(present(fix_idx))then if(size(fix_idx)>=1)then a(fix_idx(:))=0.0d0 b(fix_idx(:))=0.0d0 endif endif b = b + a enddo end function ! ################################################### end module SparseClass