function parseFiles(inputFile::String,outFile::String)
outf = open(outFile,"w")
### read in and process
for line in eachline(inputFile)
if startswith(line,"#")
continue
else
fields = split(line,'\t')
## write line by line
println(outf,fields[1],"-",fields[2],"-",fields[3],"-",fields[4],"\t",fields[5],"\t",fields[6])
end
end
close(outf)
end
This code will read a txt file line by line, separate each line by ā\t,ā and combine some columns, thus writing selected columns into a new file. How do we write this code as an asynchronous version to balance time usage between IO and computing ?
Iām not an expert on optimization of IO operations, but my impression is that with such a minimal example, where it is not clear why you need to optimize the code, it will be hard to get help.
Maybe you will get better feedback if you provide a more clear picture of what exactly are you trying to achieve in a larger scale.
Channels are nice for coordinating asynchronous code:
function parseFilesAsync(inputFile::String,outFile::String)
buf_size = 10
chan = Channel{Vector{String}}(buf_size; spawn = true) do ch
### read in and process
for line in eachline(inputFile)
if startswith(line,"#")
continue
else
fields = split(line,'\t')
put!(ch, fields)
end
end
end
open(outFile, "w") do outf
for fields in chan
println(outf, join(fields, "-"))
end
end
end