Hi,
I would like to update the sequence of a list of records in a BAM file. Looking at the code of record.jl, data is stored as Vector{UInt64} and some dark magic is used to convert to a DNA sequence.
As it looks very low level, how can I change the sequence ?
Thanks,
I don’t think it’s possible at the moment, you would have to write a function that does the inverse of BAM.sequence basically :
function hastemplength(record::Record)
return isfilled(record)
end
"""
sequence(record::Record)::BioSequences.LongDNA{4}
Get the segment sequence of `record`.
"""
function sequence(record::Record)
checkfilled(record)
seqlen = seqlength(record)
if seqlen == 0
return nothing
end
data = Vector{UInt64}(undef, cld(seqlen, 16))
src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
for i in 1:lastindex(data)
# copy data flipping high and low nybble
x = unsafe_load(src, i)
Maybe a workaround would be to write back to a SAM file and then convert to BAM with samtools. Looks like there’s string constructor for SAM :
@test isa(collect(header), Vector{SAM.MetaInfo})
end
@testset "Record" begin
record = SAM.Record()
@test !isfilled(record)
@test !SAM.ismapped(record)
@test repr(record) == "XAM.SAM.Record: <not filled>"
@test_throws ArgumentError SAM.flag(record)
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
@test isfilled(record)
@test occursin(r"^XAM.SAM.Record:\n", repr(record))
@test SAM.ismapped(record)
@test SAM.isprimary(record)
@test SAM.hastempname(record)
@test SAM.tempname(record) == "r001"
@test SAM.hasflag(record)
@test SAM.flag(record) === UInt16(99)
@test SAM.hasrefname(record)
@test SAM.refname(record) == "chr1"