using FASTX
using BioSequences
using CodecZlib
function demux(sample_table, fastq_file, mismatch)
barcode_sample_dict = Dict{String,String}()
f = open(sample_table, “r”)
for line in readlines(f)
array = split(line, “,”)
barcode_sample_dict[array[4]] = array[2]
end
close(f)
keys_as_vector = collect(keys(barcode_sample_dict))
barcodes = LongDNASeq.(keys(barcode_sample_dict))
dplxr = Demultiplexer(barcodes, n_max_errors=parse.(Int8, mismatch), distance=:hamming)
output_dict = Dict{String, String}()
reader = FASTQ.Reader(GzipDecompressorStream(open(fastq_file)))
for record in reader
index = last(split(FASTQ.description(record),":"))
check = demultiplex(dplxr, LongDNASeq(index))
if check[1] !== 0
barcode = keys_as_vector[check[1]]
sample = barcode_sample_dict[barcode]
output_dict[sample] = record
else
end
end
close(reader)
for sample in keys(output_dict)
println("$sample")
end
end
@time demux(ARGS[1], ARGS[2], ARGS[3])
julia demux_func.jl SampleSheet.NEXTflex1.csv 17-4-F-BR_S1_L001_R1_001_head4000.fastq.gz 0
ERROR: LoadError: MethodError: Cannot convert
an object of type FASTX.FASTQ.Record to an object of type String
Closest candidates are:
convert(::Type{String}, ::BioSequence, ::BioSequences.AsciiAlphabet) at /Users/xzhong/.julia/packages/BioSequences/k4j4J/src/biosequence/conversion.jl:20
convert(::Type{T}, ::T) where T<:AbstractString at strings/basic.jl:229
convert(::Type{T}, ::AbstractString) where T<:AbstractString at strings/basic.jl:230
…
Stacktrace:
[1] setindex!( ::Dict{String,String}, ::FASTX.FASTQ.Record, ::String ) at ./dict.jl:380
[2] demux( ::String, ::String, ::String ) at /Users/xzhong/scripts/julia/demux_func.jl:46
[3] top-level scope at ./timing.jl:174
[4] include( ::Function, ::Module, ::String ) at ./Base.jl:380
[5] include( ::Module, ::String ) at ./Base.jl:368
[6] exec_options( ::Base.JLOptions ) at ./client.jl:296
[7] _start() at ./client.jl:506
in expression starting at /Users/xzhong/scripts/julia/demux_func.jl:61