Is this the right place to ask for a quick code review from anyone with their expertise to offer?
This is a simplified version of my code.
I am reading a few files and their contents are streamed to multiple other files (filename dependent on the results of their contents passed through a function).
Here is some executable example code which I hope explains what I am doing. I added some print statements so that you can see the order that operarions happen in.
# Ensure an empty directory for the execution of this question's code
tmp_dir = "/tmp/discourse-question-1234"
rm(tmp_dir, force=true, recursive=true)
mkdir(tmp_dir)
# Write example ".fakeq" files.
# In my real life problem, they would be ".fastq" (see https://en.wikipedia.org/wiki/FASTQ_format)
# and sample would not be known at this stage, simplifying to keep things relevant to question
open("$(tmp_dir)/pool1.fakeq", "w") do f
write(f, "id1_sample1_ACGTA\n")
write(f, "id2_sample3_CGTACG\n")
write(f, "id3_sample2_GTACTAC\n")
write(f, "id4_sample1_TACGGTAC\n")
write(f, "id5_sample2_ACGTGTACG\n")
write(f, "id6_sample3_CGTATACGTA\n")
write(f, "id7_sample2_GTACCGTAC\n")
write(f, "id8_sample1_TACGGTAC\n")
write(f, "id9_sample1_ACGTGTA\n")
end
open("$(tmp_dir)/pool2.fakeq", "w") do f
write(f, "id10_sample2_ACGTAACGTA\n")
write(f, "id11_sample1_CGTACGCGTACG\n")
write(f, "id12_sample3_GTACTACGTACTAC\n")
write(f, "id13_sample2_TACGGTACTACGGTAC\n")
write(f, "id14_sample1_ACGTGTACGACGTGTACG\n")
write(f, "id15_sample3_CGTATACGTACGTATACGTA\n")
write(f, "id16_sample2_GTACCGTACGTACCGTAC\n")
write(f, "id17_sample1_TACGGTACTACGGTAC\n")
write(f, "id18_sample1_ACGTGTAACGTGTA\n")
end
# This array can be in the order of 10 - 20 elements long
csv_header = [
:identifier,
:sample_name,
:sequence,
:sequence_length
]
# This array can be in the order of 25 - 50 elements long.
# In real-life problem, we know this list of samples up front
# and sample_name is calculated by matching an array of nucleotide
# 'barcode' sequences up against each sequence in the .fastq files
sample_names = [
:sample1,
:sample2,
:sample3
]
# This array can be in the order of 4 - 12 elements long
# In real-life problem, we know this list of pools up front and each
# pool corresponds to a .fastq file mentioned above
pool_list = [
:pool1,
:pool2
]
sample_csv_mapping = Dict() # Why can't this be defined in the try block?
try
#sample_csv_mapping = Dict()
for sample_name in sample_names
csv_stream = open("$(tmp_dir)/$(sample_name).csv", "w")
println("OPENED: $(csv_stream.name)")
sample_csv_mapping[sample_name] = csv_stream
write(csv_stream, join(csv_header, ","), "\n")
end
for pool in pool_list
open("$(tmp_dir)/$(pool).fakeq", "r") do f
lines = readlines(f)
for line in lines
identifier, sample_name, sequence = split(line, "_")
sequence_length = length(sequence)
# Why is this not working as expected
#write(sample_csv_mapping[Symbol(sample_name)], join(eval.(csv_header), ","), "\n")
csv_row = [
identifier,
sample_name,
sequence,
sequence_length
]
write(sample_csv_mapping[Symbol(sample_name)], join(csv_row, ","), "\n")
println("WROTE_TO: $(sample_csv_mapping[Symbol(sample_name)].name)")
end
end
end
finally
for (sample, csv_stream) in sample_csv_mapping
close(csv_stream)
println("CLOSED: $(csv_stream.name)")
end
end
I especially want to know why the commented out line #write(sample_csv_mapping[Symbol(sample_name)], join(eval.(csv_header), ","), "\n")
does not work
Also, when sample_csv_mapping
is defined inside the try block… finally is unable to access it… see line which is commented out.
Also, is there a more correct way in which to open a few files, write to them in a random and repeated order, then close all of them (even in case of an error)?