This loop has less efficient memory access patterns than a nearby outer loop. To fix: Reorder the loops if possible.
Interchanging is not always possible because of dependencies, which can lead to different results.…
do j=1,arrSize
do i=1,arrSize
do k=1,arrSize
cMatrix(i,j) = cMatrix(i,j) + aMatrix(i,k) * bMatrix(k,j)
end do
end do
end do
…
subroutine matrix_multiply(arrSize, aMatrix, bMatrix, cMatrix)
implicit none
real, intent(inout) :: cMatrix(:,:)
real, intent(in) :: aMatrix(:,:), bMatrix(:,:)
integer, intent(in) :: arrSize
integer :: i,j,k;
do j=1,arrSize
do i=1,arrSize
do k=1,arrSize
cMatrix(i,j) = cMatrix(i,j) + aMatrix(i,k) * bMatrix(k,j)
end do
end do
end do
end subroutine matrix_multiply
…
do j=1,arrSize
do k=1,arrSize
do i=1,arrSize
cMatrix(i,j) = cMatrix(i,j) + aMatrix(i,k) * bMatrix(k,j)
end do
end do
end do
…
subroutine matrix_multiply(arrSize, aMatrix, bMatrix, cMatrix)
implicit none
real, intent(inout) :: cMatrix(:,:)
real, intent(in) :: aMatrix(:,:), bMatrix(:,:)
integer, intent(in) :: arrSize
integer :: i,j,k;
do j=1,arrSize
do k=1,arrSize
do i=1,arrSize
cMatrix(i,j) = cMatrix(i,j) + aMatrix(i,k) * bMatrix(k,j)
end do
end do
end do
end subroutine matrix_multiply