Skip to content

Commit

Permalink
Merge pull request #961 from zoziha/buffer-1
Browse files Browse the repository at this point in the history
Improve text file reading performance
  • Loading branch information
henilp105 committed Mar 29, 2024
2 parents 07fce84 + 4cf8c21 commit d3dd5d4
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 27 deletions.
74 changes: 55 additions & 19 deletions src/fpm_filesystem.F90
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
!!
module fpm_filesystem
use,intrinsic :: iso_fortran_env, only : stdin=>input_unit, stdout=>output_unit, stderr=>error_unit
use,intrinsic :: iso_c_binding, only: c_new_line
use fpm_environment, only: get_os_type, &
OS_UNKNOWN, OS_LINUX, OS_MACOS, OS_WINDOWS, &
OS_CYGWIN, OS_SOLARIS, OS_FREEBSD, OS_OPENBSD
use fpm_environment, only: separator, get_env, os_is_unix
use fpm_strings, only: f_string, replace, string_t, split, dilate, str_begins_with_str
use fpm_strings, only: f_string, replace, string_t, split, split_first_last, dilate, str_begins_with_str
use iso_c_binding, only: c_char, c_ptr, c_int, c_null_char, c_associated, c_f_pointer
use fpm_error, only : fpm_stop, error_t, fatal_error
implicit none
Expand Down Expand Up @@ -50,6 +51,8 @@ end function c_is_dir
end interface
#endif

character(*), parameter :: eol = new_line('a') !! End of line

contains

!> Extract filename from path with/without suffix
Expand Down Expand Up @@ -302,37 +305,71 @@ integer function number_of_rows(s) result(nrows)
end function number_of_rows

!> read lines into an array of TYPE(STRING_T) variables expanding tabs
function read_lines_expanded(fh) result(lines)
integer, intent(in) :: fh
function read_lines_expanded(filename) result(lines)
character(len=*), intent(in) :: filename
type(string_t), allocatable :: lines(:)

integer :: i
integer :: iostat
character(len=:),allocatable :: line_buffer_read
character(len=:), allocatable :: content
integer, allocatable :: first(:), last(:)

content = read_text_file(filename)
if (len(content) == 0) then
allocate (lines(0))
return
end if

call split_first_last(content, eol, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows)

allocate(lines(number_of_rows(fh)))
do i = 1, size(lines)
call getline(fh, line_buffer_read, iostat)
lines(i)%s = dilate(line_buffer_read)
! allocate lines from file content string
allocate (lines(size(first)))
do i = 1, size(first)
allocate(lines(i)%s, source=dilate(content(first(i):last(i))))
end do

end function read_lines_expanded

!> read lines into an array of TYPE(STRING_T) variables
function read_lines(fh) result(lines)
integer, intent(in) :: fh
function read_lines(filename) result(lines)
character(len=*), intent(in) :: filename
type(string_t), allocatable :: lines(:)

integer :: i
integer :: iostat
character(len=:), allocatable :: content
integer, allocatable :: first(:), last(:)

allocate(lines(number_of_rows(fh)))
do i = 1, size(lines)
call getline(fh, lines(i)%s, iostat)
content = read_text_file(filename)
if (len(content) == 0) then
allocate (lines(0))
return
end if

call split_first_last(content, eol, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows)

! allocate lines from file content string
allocate (lines(size(first)))
do i = 1, size(first)
allocate(lines(i)%s, source=content(first(i):last(i)))
end do

end function read_lines

!> read text file into a string
function read_text_file(filename) result(string)
character(len=*), intent(in) :: filename
character(len=:), allocatable :: string
integer :: fh, length

open (newunit=fh, file=filename, status='old', action='read', &
access='stream', form='unformatted')
inquire (fh, size=length)
allocate (character(len=length) :: string)
if (length == 0) return
read (fh) string
close (fh)

end function read_text_file

!> Create a directory. Create subdirectories as needed
subroutine mkdir(dir, echo)
character(len=*), intent(in) :: dir
Expand Down Expand Up @@ -480,9 +517,8 @@ recursive subroutine list_files(dir, files, recurse)
call fpm_stop(2,'*list_files*:directory listing failed')
end if

open (newunit=fh, file=temp_file, status='old')
files = read_lines(fh)
close(fh,status="delete")
files = read_lines(temp_file)
call delete_file(temp_file)

do i=1,size(files)
files(i)%s = join_path(dir,files(i)%s)
Expand Down Expand Up @@ -678,7 +714,7 @@ subroutine getline(unit, line, iostat, iomsg)
!> Error message
character(len=:), allocatable, optional :: iomsg

integer, parameter :: BUFFER_SIZE = 32768
integer, parameter :: BUFFER_SIZE = 1024
character(len=BUFFER_SIZE) :: buffer
character(len=256) :: msg
integer :: size
Expand Down
8 changes: 2 additions & 6 deletions src/fpm_source_parsing.f90
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ function parse_f_source(f_filename,error) result(f_source)

f_source%file_name = f_filename

open(newunit=fh,file=f_filename,status='old')
file_lines = read_lines_expanded(fh)
close(fh)
file_lines = read_lines_expanded(f_filename)

! for efficiency in parsing make a lowercase left-adjusted copy of the file
! Need a copy because INCLUDE (and #include) file arguments are case-sensitive
Expand Down Expand Up @@ -427,9 +425,7 @@ function parse_c_source(c_filename,error) result(c_source)
allocate(c_source%modules_provided(0))
allocate(c_source%parent_modules(0))

open(newunit=fh,file=c_filename,status='old')
file_lines = read_lines(fh)
close(fh)
file_lines = read_lines(c_filename)

! Ignore empty files, returned as FPM_UNIT_UNKNOWN
if (len_trim(file_lines) < 1) then
Expand Down
73 changes: 71 additions & 2 deletions src/fpm_strings.f90
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
!! - [[LOWER]] Changes a string to lowercase over optional specified column range
!!### Parsing and joining
!! - [[SPLIT]] parse string on delimiter characters and store tokens into an allocatable array
!! - [[SPLIT_FIRST_LAST]] Computes the first and last indices of tokens in input string, delimited by the characters in set,
!! and stores them into first and last output arrays.
!! - [[STRING_CAT]] Concatenate an array of **type(string_t)** into a single **CHARACTER** variable
!! - [[JOIN]] append an array of **CHARACTER** variables into a single **CHARACTER** variable
!!### Testing
Expand Down Expand Up @@ -40,7 +42,7 @@ module fpm_strings
implicit none

private
public :: f_string, lower, upper, split, str_ends_with, string_t, str_begins_with_str
public :: f_string, lower, upper, split, split_first_last, str_ends_with, string_t, str_begins_with_str
public :: to_fortran_name, is_fortran_name
public :: string_array_contains, string_cat, len_trim, operator(.in.), fnv_1a
public :: replace, resize, str, join, glob
Expand Down Expand Up @@ -518,6 +520,73 @@ subroutine split(input_line,array,delimiters,order,nulls)
enddo
end subroutine split

!! Author: Milan Curcic
!! Computes the first and last indices of tokens in input string, delimited
!! by the characters in set, and stores them into first and last output
!! arrays.
pure subroutine split_first_last(string, set, first, last)
character(*), intent(in) :: string
character(*), intent(in) :: set
integer, allocatable, intent(out) :: first(:)
integer, allocatable, intent(out) :: last(:)

integer, dimension(len(string) + 1) :: istart, iend
integer :: p, n, slen

slen = len(string)

n = 0
if (slen > 0) then
p = 0
do while (p < slen)
n = n + 1
istart(n) = min(p + 1, slen)
call split_pos(string, set, p)
iend(n) = p - 1
end do
end if

first = istart(:n)
last = iend(:n)

end subroutine split_first_last

!! Author: Milan Curcic
!! If back is absent, computes the leftmost token delimiter in string whose
!! position is > pos. If back is present and true, computes the rightmost
!! token delimiter in string whose position is < pos. The result is stored
!! in pos.
pure subroutine split_pos(string, set, pos, back)
character(*), intent(in) :: string
character(*), intent(in) :: set
integer, intent(in out) :: pos
logical, intent(in), optional :: back

logical :: backward
integer :: result_pos, bound

if (len(string) == 0) then
pos = 1
return
end if

!TODO use optval when implemented in stdlib
!backward = optval(back, .false.)
backward = .false.
if (present(back)) backward = back

if (backward) then
bound = min(len(string), max(pos - 1, 0))
result_pos = scan(string(:bound), set, back=.true.)
else
result_pos = scan(string(min(pos + 1, len(string)):), set) + pos
if (result_pos < pos + 1) result_pos = len(string) + 1
end if

pos = result_pos

end subroutine split_pos

!> Returns string with characters in charset replaced with target_char.
pure function replace(string, charset, target_char) result(res)
character(*), intent(in) :: string
Expand Down Expand Up @@ -1371,7 +1440,7 @@ subroutine remove_newline_characters(string)

integer :: feed,length

character(*), parameter :: CRLF = new_line('a')//achar(13)
character(*), parameter :: CRLF = achar(13)//new_line('a')
character(*), parameter :: SPACE = ' '

call remove_characters_in_set(string%s,set=CRLF,replace_with=SPACE)
Expand Down

0 comments on commit d3dd5d4

Please sign in to comment.